diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index c529408366d84c..741cfba14f31d5 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -2,306 +2,306 @@ ; RUN: opt < %s -S -passes=openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -%struct.omp_lock_t = type { i8* } -%struct.omp_nest_lock_t = type { i8* } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.omp_lock_t = type { ptr } +%struct.omp_nest_lock_t = type { ptr } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -define void @call_all(i32 %schedule, %struct.omp_lock_t* %lock, i32 %lock_hint, %struct.omp_nest_lock_t* %nest_lock, i32 %i, i8* %s, i64 %st, i8* %vp, double %d, i32 %proc_bind, i64 %allocator_handle, i8* %cp, i64 %event_handle, i32 %pause_resource) { +define void @call_all(i32 %schedule, ptr %lock, i32 %lock_hint, ptr %nest_lock, i32 %i, ptr %s, i64 %st, ptr %vp, double %d, i32 %proc_bind, i64 %allocator_handle, ptr %cp, i64 %event_handle, i32 %pause_resource) { entry: %schedule.addr = alloca i32, align 4 - %lock.addr = alloca %struct.omp_lock_t*, align 8 + %lock.addr = alloca ptr, align 8 %lock_hint.addr = alloca i32, align 4 - %nest_lock.addr = alloca %struct.omp_nest_lock_t*, align 8 + %nest_lock.addr = alloca ptr, align 8 %i.addr = alloca i32, align 4 - %s.addr = alloca i8*, align 8 + %s.addr = alloca ptr, align 8 %st.addr = alloca i64, align 8 - %vp.addr = alloca i8*, align 8 + %vp.addr = alloca ptr, align 8 %d.addr = alloca double, align 8 %proc_bind.addr = alloca i32, align 4 %allocator_handle.addr = alloca i64, align 8 - %cp.addr = alloca i8*, align 8 + %cp.addr = alloca ptr, align 8 %event_handle.addr = alloca i64, align 8 %pause_resource.addr = alloca i32, align 4 - store i32 %schedule, i32* %schedule.addr, align 4 - store %struct.omp_lock_t* %lock, %struct.omp_lock_t** %lock.addr, align 8 - store i32 %lock_hint, i32* %lock_hint.addr, align 4 - store %struct.omp_nest_lock_t* %nest_lock, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - store i32 %i, i32* %i.addr, align 4 - store i8* %s, i8** %s.addr, align 8 - store i64 %st, i64* %st.addr, align 8 - store i8* %vp, i8** %vp.addr, align 8 - store double %d, double* %d.addr, align 8 - store i32 %proc_bind, i32* %proc_bind.addr, align 4 - store i64 %allocator_handle, i64* %allocator_handle.addr, align 8 - store i8* %cp, i8** %cp.addr, align 8 - store i64 %event_handle, i64* %event_handle.addr, align 8 - store i32 %pause_resource, i32* %pause_resource.addr, align 4 + store i32 %schedule, ptr %schedule.addr, align 4 + store ptr %lock, ptr %lock.addr, align 8 + store i32 %lock_hint, ptr %lock_hint.addr, align 4 + store ptr %nest_lock, ptr %nest_lock.addr, align 8 + store i32 %i, ptr %i.addr, align 4 + store ptr %s, ptr %s.addr, align 8 + store i64 %st, ptr %st.addr, align 8 + store ptr %vp, ptr %vp.addr, align 8 + store double %d, ptr %d.addr, align 8 + store i32 %proc_bind, ptr %proc_bind.addr, align 4 + store i64 %allocator_handle, ptr %allocator_handle.addr, align 8 + store ptr %cp, ptr %cp.addr, align 8 + store i64 %event_handle, ptr %event_handle.addr, align 8 + store i32 %pause_resource, ptr %pause_resource.addr, align 4 call void @omp_set_num_threads(i32 0) call void @omp_set_dynamic(i32 0) call void @omp_set_nested(i32 0) call void @omp_set_max_active_levels(i32 0) - %0 = load i32, i32* %schedule.addr, align 4 + %0 = load i32, ptr %schedule.addr, align 4 call void @omp_set_schedule(i32 %0, i32 0) %call = call i32 @omp_get_num_threads() - store i32 %call, i32* %i.addr, align 4 - %1 = load i32, i32* %i.addr, align 4 + store i32 %call, ptr %i.addr, align 4 + %1 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %1) %call1 = call i32 @omp_get_dynamic() - store i32 %call1, i32* %i.addr, align 4 - %2 = load i32, i32* %i.addr, align 4 + store i32 %call1, ptr %i.addr, align 4 + %2 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %2) %call2 = call i32 @omp_get_nested() - store i32 %call2, i32* %i.addr, align 4 - %3 = load i32, i32* %i.addr, align 4 + store i32 %call2, ptr %i.addr, align 4 + %3 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %3) %call3 = call i32 @omp_get_max_threads() - store i32 %call3, i32* %i.addr, align 4 - %4 = load i32, i32* %i.addr, align 4 + store i32 %call3, ptr %i.addr, align 4 + %4 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %4) %call4 = call i32 @omp_get_thread_num() - store i32 %call4, i32* %i.addr, align 4 - %5 = load i32, i32* %i.addr, align 4 + store i32 %call4, ptr %i.addr, align 4 + %5 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %5) %call5 = call i32 @omp_get_num_procs() - store i32 %call5, i32* %i.addr, align 4 - %6 = load i32, i32* %i.addr, align 4 + store i32 %call5, ptr %i.addr, align 4 + %6 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %6) %call6 = call i32 @omp_in_parallel() - store i32 %call6, i32* %i.addr, align 4 - %7 = load i32, i32* %i.addr, align 4 + store i32 %call6, ptr %i.addr, align 4 + %7 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %7) %call7 = call i32 @omp_in_final() - store i32 %call7, i32* %i.addr, align 4 - %8 = load i32, i32* %i.addr, align 4 + store i32 %call7, ptr %i.addr, align 4 + %8 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %8) %call8 = call i32 @omp_get_active_level() - store i32 %call8, i32* %i.addr, align 4 - %9 = load i32, i32* %i.addr, align 4 + store i32 %call8, ptr %i.addr, align 4 + %9 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %9) %call9 = call i32 @omp_get_level() - store i32 %call9, i32* %i.addr, align 4 - %10 = load i32, i32* %i.addr, align 4 + store i32 %call9, ptr %i.addr, align 4 + %10 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %10) %call10 = call i32 @omp_get_ancestor_thread_num(i32 0) - store i32 %call10, i32* %i.addr, align 4 - %11 = load i32, i32* %i.addr, align 4 + store i32 %call10, ptr %i.addr, align 4 + %11 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %11) %call11 = call i32 @omp_get_team_size(i32 0) - store i32 %call11, i32* %i.addr, align 4 - %12 = load i32, i32* %i.addr, align 4 + store i32 %call11, ptr %i.addr, align 4 + %12 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %12) %call12 = call i32 @omp_get_thread_limit() - store i32 %call12, i32* %i.addr, align 4 - %13 = load i32, i32* %i.addr, align 4 + store i32 %call12, ptr %i.addr, align 4 + %13 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %13) %call13 = call i32 @omp_get_max_active_levels() - store i32 %call13, i32* %i.addr, align 4 - %14 = load i32, i32* %i.addr, align 4 + store i32 %call13, ptr %i.addr, align 4 + %14 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %14) - call void @omp_get_schedule(i32* %schedule.addr, i32* %i.addr) + call void @omp_get_schedule(ptr %schedule.addr, ptr %i.addr) %call14 = call i32 @omp_get_max_task_priority() - store i32 %call14, i32* %i.addr, align 4 - %15 = load i32, i32* %i.addr, align 4 + store i32 %call14, ptr %i.addr, align 4 + %15 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %15) - %16 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - call void @omp_init_lock(%struct.omp_lock_t* %16) - %17 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - call void @omp_set_lock(%struct.omp_lock_t* %17) - %18 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - call void @omp_unset_lock(%struct.omp_lock_t* %18) - %19 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - call void @omp_destroy_lock(%struct.omp_lock_t* %19) - %20 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - %call15 = call i32 @omp_test_lock(%struct.omp_lock_t* %20) - store i32 %call15, i32* %i.addr, align 4 - %21 = load i32, i32* %i.addr, align 4 + %16 = load ptr, ptr %lock.addr, align 8 + call void @omp_init_lock(ptr %16) + %17 = load ptr, ptr %lock.addr, align 8 + call void @omp_set_lock(ptr %17) + %18 = load ptr, ptr %lock.addr, align 8 + call void @omp_unset_lock(ptr %18) + %19 = load ptr, ptr %lock.addr, align 8 + call void @omp_destroy_lock(ptr %19) + %20 = load ptr, ptr %lock.addr, align 8 + %call15 = call i32 @omp_test_lock(ptr %20) + store i32 %call15, ptr %i.addr, align 4 + %21 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %21) - %22 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - call void @omp_init_nest_lock(%struct.omp_nest_lock_t* %22) - %23 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - call void @omp_set_nest_lock(%struct.omp_nest_lock_t* %23) - %24 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - call void @omp_unset_nest_lock(%struct.omp_nest_lock_t* %24) - %25 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - call void @omp_destroy_nest_lock(%struct.omp_nest_lock_t* %25) - %26 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - %call16 = call i32 @omp_test_nest_lock(%struct.omp_nest_lock_t* %26) - store i32 %call16, i32* %i.addr, align 4 - %27 = load i32, i32* %i.addr, align 4 + %22 = load ptr, ptr %nest_lock.addr, align 8 + call void @omp_init_nest_lock(ptr %22) + %23 = load ptr, ptr %nest_lock.addr, align 8 + call void @omp_set_nest_lock(ptr %23) + %24 = load ptr, ptr %nest_lock.addr, align 8 + call void @omp_unset_nest_lock(ptr %24) + %25 = load ptr, ptr %nest_lock.addr, align 8 + call void @omp_destroy_nest_lock(ptr %25) + %26 = load ptr, ptr %nest_lock.addr, align 8 + %call16 = call i32 @omp_test_nest_lock(ptr %26) + store i32 %call16, ptr %i.addr, align 4 + %27 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %27) - %28 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 - %29 = load i32, i32* %lock_hint.addr, align 4 - call void @omp_init_lock_with_hint(%struct.omp_lock_t* %28, i32 %29) - %30 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 - %31 = load i32, i32* %lock_hint.addr, align 4 - call void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t* %30, i32 %31) + %28 = load ptr, ptr %lock.addr, align 8 + %29 = load i32, ptr %lock_hint.addr, align 4 + call void @omp_init_lock_with_hint(ptr %28, i32 %29) + %30 = load ptr, ptr %nest_lock.addr, align 8 + %31 = load i32, ptr %lock_hint.addr, align 4 + call void @omp_init_nest_lock_with_hint(ptr %30, i32 %31) %call17 = call double @omp_get_wtime() - store double %call17, double* %d.addr, align 8 - %32 = load double, double* %d.addr, align 8 + store double %call17, ptr %d.addr, align 8 + %32 = load double, ptr %d.addr, align 8 call void @use_double(double %32) %call18 = call double @omp_get_wtick() - store double %call18, double* %d.addr, align 8 - %33 = load double, double* %d.addr, align 8 + store double %call18, ptr %d.addr, align 8 + %33 = load double, ptr %d.addr, align 8 call void @use_double(double %33) %call19 = call i32 @omp_get_default_device() - store i32 %call19, i32* %i.addr, align 4 - %34 = load i32, i32* %i.addr, align 4 + store i32 %call19, ptr %i.addr, align 4 + %34 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %34) call void @omp_set_default_device(i32 0) %call20 = call i32 @omp_is_initial_device() - store i32 %call20, i32* %i.addr, align 4 - %35 = load i32, i32* %i.addr, align 4 + store i32 %call20, ptr %i.addr, align 4 + %35 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %35) %call21 = call i32 @omp_get_num_devices() - store i32 %call21, i32* %i.addr, align 4 - %36 = load i32, i32* %i.addr, align 4 + store i32 %call21, ptr %i.addr, align 4 + %36 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %36) %call22 = call i32 @omp_get_num_teams() - store i32 %call22, i32* %i.addr, align 4 - %37 = load i32, i32* %i.addr, align 4 + store i32 %call22, ptr %i.addr, align 4 + %37 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %37) %call23 = call i32 @omp_get_team_num() - store i32 %call23, i32* %i.addr, align 4 - %38 = load i32, i32* %i.addr, align 4 + store i32 %call23, ptr %i.addr, align 4 + %38 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %38) %call24 = call i32 @omp_get_cancellation() - store i32 %call24, i32* %i.addr, align 4 - %39 = load i32, i32* %i.addr, align 4 + store i32 %call24, ptr %i.addr, align 4 + %39 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %39) %call25 = call i32 @omp_get_initial_device() - store i32 %call25, i32* %i.addr, align 4 - %40 = load i32, i32* %i.addr, align 4 + store i32 %call25, ptr %i.addr, align 4 + %40 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %40) - %41 = load i64, i64* %st.addr, align 8 - %42 = load i32, i32* %i.addr, align 4 - %call26 = call i8* @omp_target_alloc(i64 %41, i32 %42) - store i8* %call26, i8** %vp.addr, align 8 - %43 = load i8*, i8** %vp.addr, align 8 - call void @use_voidptr(i8* %43) - %44 = load i8*, i8** %vp.addr, align 8 - %45 = load i32, i32* %i.addr, align 4 - call void @omp_target_free(i8* %44, i32 %45) - %46 = load i8*, i8** %vp.addr, align 8 - %47 = load i32, i32* %i.addr, align 4 - %call27 = call i32 @omp_target_is_present(i8* %46, i32 %47) - store i32 %call27, i32* %i.addr, align 4 - %48 = load i32, i32* %i.addr, align 4 + %41 = load i64, ptr %st.addr, align 8 + %42 = load i32, ptr %i.addr, align 4 + %call26 = call ptr @omp_target_alloc(i64 %41, i32 %42) + store ptr %call26, ptr %vp.addr, align 8 + %43 = load ptr, ptr %vp.addr, align 8 + call void @use_voidptr(ptr %43) + %44 = load ptr, ptr %vp.addr, align 8 + %45 = load i32, ptr %i.addr, align 4 + call void @omp_target_free(ptr %44, i32 %45) + %46 = load ptr, ptr %vp.addr, align 8 + %47 = load i32, ptr %i.addr, align 4 + %call27 = call i32 @omp_target_is_present(ptr %46, i32 %47) + store i32 %call27, ptr %i.addr, align 4 + %48 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %48) - %49 = load i8*, i8** %vp.addr, align 8 - %50 = load i8*, i8** %vp.addr, align 8 - %51 = load i64, i64* %st.addr, align 8 - %52 = load i64, i64* %st.addr, align 8 - %53 = load i64, i64* %st.addr, align 8 - %54 = load i32, i32* %i.addr, align 4 - %55 = load i32, i32* %i.addr, align 4 - %call28 = call i32 @omp_target_memcpy(i8* %49, i8* %50, i64 %51, i64 %52, i64 %53, i32 %54, i32 %55) - store i32 %call28, i32* %i.addr, align 4 - %56 = load i32, i32* %i.addr, align 4 + %49 = load ptr, ptr %vp.addr, align 8 + %50 = load ptr, ptr %vp.addr, align 8 + %51 = load i64, ptr %st.addr, align 8 + %52 = load i64, ptr %st.addr, align 8 + %53 = load i64, ptr %st.addr, align 8 + %54 = load i32, ptr %i.addr, align 4 + %55 = load i32, ptr %i.addr, align 4 + %call28 = call i32 @omp_target_memcpy(ptr %49, ptr %50, i64 %51, i64 %52, i64 %53, i32 %54, i32 %55) + store i32 %call28, ptr %i.addr, align 4 + %56 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %56) - %57 = load i8*, i8** %vp.addr, align 8 - %58 = load i8*, i8** %vp.addr, align 8 - %59 = load i64, i64* %st.addr, align 8 - %60 = load i64, i64* %st.addr, align 8 - %61 = load i32, i32* %i.addr, align 4 - %call29 = call i32 @omp_target_associate_ptr(i8* %57, i8* %58, i64 %59, i64 %60, i32 %61) - store i32 %call29, i32* %i.addr, align 4 - %62 = load i32, i32* %i.addr, align 4 + %57 = load ptr, ptr %vp.addr, align 8 + %58 = load ptr, ptr %vp.addr, align 8 + %59 = load i64, ptr %st.addr, align 8 + %60 = load i64, ptr %st.addr, align 8 + %61 = load i32, ptr %i.addr, align 4 + %call29 = call i32 @omp_target_associate_ptr(ptr %57, ptr %58, i64 %59, i64 %60, i32 %61) + store i32 %call29, ptr %i.addr, align 4 + %62 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %62) - %63 = load i8*, i8** %vp.addr, align 8 - %64 = load i32, i32* %i.addr, align 4 - %call30 = call i32 @omp_target_disassociate_ptr(i8* %63, i32 %64) - store i32 %call30, i32* %i.addr, align 4 - %65 = load i32, i32* %i.addr, align 4 + %63 = load ptr, ptr %vp.addr, align 8 + %64 = load i32, ptr %i.addr, align 4 + %call30 = call i32 @omp_target_disassociate_ptr(ptr %63, i32 %64) + store i32 %call30, ptr %i.addr, align 4 + %65 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %65) %call31 = call i32 @omp_get_device_num() - store i32 %call31, i32* %i.addr, align 4 - %66 = load i32, i32* %i.addr, align 4 + store i32 %call31, ptr %i.addr, align 4 + %66 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %66) %call32 = call i32 @omp_get_proc_bind() - store i32 %call32, i32* %proc_bind.addr, align 4 + store i32 %call32, ptr %proc_bind.addr, align 4 %call33 = call i32 @omp_get_num_places() - store i32 %call33, i32* %i.addr, align 4 - %67 = load i32, i32* %i.addr, align 4 + store i32 %call33, ptr %i.addr, align 4 + %67 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %67) %call34 = call i32 @omp_get_place_num_procs(i32 0) - store i32 %call34, i32* %i.addr, align 4 - %68 = load i32, i32* %i.addr, align 4 + store i32 %call34, ptr %i.addr, align 4 + %68 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %68) - %69 = load i32, i32* %i.addr, align 4 - call void @omp_get_place_proc_ids(i32 %69, i32* %i.addr) + %69 = load i32, ptr %i.addr, align 4 + call void @omp_get_place_proc_ids(i32 %69, ptr %i.addr) %call35 = call i32 @omp_get_place_num() - store i32 %call35, i32* %i.addr, align 4 - %70 = load i32, i32* %i.addr, align 4 + store i32 %call35, ptr %i.addr, align 4 + %70 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %70) %call36 = call i32 @omp_get_partition_num_places() - store i32 %call36, i32* %i.addr, align 4 - %71 = load i32, i32* %i.addr, align 4 + store i32 %call36, ptr %i.addr, align 4 + %71 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %71) - call void @omp_get_partition_place_nums(i32* %i.addr) - %72 = load i32, i32* %i.addr, align 4 - %73 = load i32, i32* %i.addr, align 4 - %74 = load i8*, i8** %vp.addr, align 8 - %call37 = call i32 @omp_control_tool(i32 %72, i32 %73, i8* %74) - store i32 %call37, i32* %i.addr, align 4 - %75 = load i32, i32* %i.addr, align 4 + call void @omp_get_partition_place_nums(ptr %i.addr) + %72 = load i32, ptr %i.addr, align 4 + %73 = load i32, ptr %i.addr, align 4 + %74 = load ptr, ptr %vp.addr, align 8 + %call37 = call i32 @omp_control_tool(i32 %72, i32 %73, ptr %74) + store i32 %call37, ptr %i.addr, align 4 + %75 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %75) - %76 = load i64, i64* %allocator_handle.addr, align 8 + %76 = load i64, ptr %allocator_handle.addr, align 8 call void @omp_destroy_allocator(i64 %76) - %77 = load i64, i64* %allocator_handle.addr, align 8 + %77 = load i64, ptr %allocator_handle.addr, align 8 call void @omp_set_default_allocator(i64 %77) %call38 = call i64 @omp_get_default_allocator() - store i64 %call38, i64* %allocator_handle.addr, align 8 - %78 = load i64, i64* %st.addr, align 8 - %79 = load i64, i64* %allocator_handle.addr, align 8 - %call39 = call i8* @omp_alloc(i64 %78, i64 %79) - store i8* %call39, i8** %vp.addr, align 8 - %80 = load i8*, i8** %vp.addr, align 8 - call void @use_voidptr(i8* %80) - %81 = load i8*, i8** %vp.addr, align 8 - %82 = load i64, i64* %allocator_handle.addr, align 8 - call void @omp_free(i8* %81, i64 %82) - %83 = load i64, i64* %st.addr, align 8 - %84 = load i64, i64* %allocator_handle.addr, align 8 - %call40 = call i8* @omp_alloc(i64 %83, i64 %84) - store i8* %call40, i8** %vp.addr, align 8 - %85 = load i8*, i8** %vp.addr, align 8 - call void @use_voidptr(i8* %85) - %86 = load i8*, i8** %vp.addr, align 8 - %87 = load i64, i64* %allocator_handle.addr, align 8 - call void @omp_free(i8* %86, i64 %87) - %88 = load i8*, i8** %s.addr, align 8 - call void @ompc_set_affinity_format(i8* %88) - %89 = load i8*, i8** %cp.addr, align 8 - %90 = load i64, i64* %st.addr, align 8 - %call41 = call i64 @ompc_get_affinity_format(i8* %89, i64 %90) - store i64 %call41, i64* %st.addr, align 8 - %91 = load i64, i64* %st.addr, align 8 + store i64 %call38, ptr %allocator_handle.addr, align 8 + %78 = load i64, ptr %st.addr, align 8 + %79 = load i64, ptr %allocator_handle.addr, align 8 + %call39 = call ptr @omp_alloc(i64 %78, i64 %79) + store ptr %call39, ptr %vp.addr, align 8 + %80 = load ptr, ptr %vp.addr, align 8 + call void @use_voidptr(ptr %80) + %81 = load ptr, ptr %vp.addr, align 8 + %82 = load i64, ptr %allocator_handle.addr, align 8 + call void @omp_free(ptr %81, i64 %82) + %83 = load i64, ptr %st.addr, align 8 + %84 = load i64, ptr %allocator_handle.addr, align 8 + %call40 = call ptr @omp_alloc(i64 %83, i64 %84) + store ptr %call40, ptr %vp.addr, align 8 + %85 = load ptr, ptr %vp.addr, align 8 + call void @use_voidptr(ptr %85) + %86 = load ptr, ptr %vp.addr, align 8 + %87 = load i64, ptr %allocator_handle.addr, align 8 + call void @omp_free(ptr %86, i64 %87) + %88 = load ptr, ptr %s.addr, align 8 + call void @ompc_set_affinity_format(ptr %88) + %89 = load ptr, ptr %cp.addr, align 8 + %90 = load i64, ptr %st.addr, align 8 + %call41 = call i64 @ompc_get_affinity_format(ptr %89, i64 %90) + store i64 %call41, ptr %st.addr, align 8 + %91 = load i64, ptr %st.addr, align 8 call void @use_sizet(i64 %91) - %92 = load i8*, i8** %s.addr, align 8 - call void @ompc_display_affinity(i8* %92) - %93 = load i8*, i8** %cp.addr, align 8 - %94 = load i64, i64* %st.addr, align 8 - %95 = load i8*, i8** %s.addr, align 8 - %call42 = call i64 @ompc_capture_affinity(i8* %93, i64 %94, i8* %95) - store i64 %call42, i64* %st.addr, align 8 - %96 = load i64, i64* %st.addr, align 8 + %92 = load ptr, ptr %s.addr, align 8 + call void @ompc_display_affinity(ptr %92) + %93 = load ptr, ptr %cp.addr, align 8 + %94 = load i64, ptr %st.addr, align 8 + %95 = load ptr, ptr %s.addr, align 8 + %call42 = call i64 @ompc_capture_affinity(ptr %93, i64 %94, ptr %95) + store i64 %call42, ptr %st.addr, align 8 + %96 = load i64, ptr %st.addr, align 8 call void @use_sizet(i64 %96) - %97 = load i64, i64* %event_handle.addr, align 8 + %97 = load i64, ptr %event_handle.addr, align 8 call void @omp_fulfill_event(i64 %97) - %98 = load i32, i32* %pause_resource.addr, align 4 - %99 = load i32, i32* %i.addr, align 4 + %98 = load i32, ptr %pause_resource.addr, align 4 + %99 = load i32, ptr %i.addr, align 4 %call43 = call i32 @omp_pause_resource(i32 %98, i32 %99) - store i32 %call43, i32* %i.addr, align 4 - %100 = load i32, i32* %i.addr, align 4 + store i32 %call43, ptr %i.addr, align 4 + %100 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %100) - %101 = load i32, i32* %pause_resource.addr, align 4 + %101 = load i32, ptr %pause_resource.addr, align 4 %call44 = call i32 @omp_pause_resource_all(i32 %101) - store i32 %call44, i32* %i.addr, align 4 - %102 = load i32, i32* %i.addr, align 4 + store i32 %call44, ptr %i.addr, align 4 + %102 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %102) %call45 = call i32 @omp_get_supported_active_levels() - store i32 %call45, i32* %i.addr, align 4 - %103 = load i32, i32* %i.addr, align 4 + store i32 %call45, ptr %i.addr, align 4 + %103 = load i32, ptr %i.addr, align 4 call void @use_int(i32 %103) ret void } @@ -346,33 +346,33 @@ declare dso_local i32 @omp_get_thread_limit() declare dso_local i32 @omp_get_max_active_levels() -declare dso_local void @omp_get_schedule(i32*, i32*) +declare dso_local void @omp_get_schedule(ptr, ptr) declare dso_local i32 @omp_get_max_task_priority() -declare dso_local void @omp_init_lock(%struct.omp_lock_t*) +declare dso_local void @omp_init_lock(ptr) -declare dso_local void @omp_set_lock(%struct.omp_lock_t*) +declare dso_local void @omp_set_lock(ptr) -declare dso_local void @omp_unset_lock(%struct.omp_lock_t*) +declare dso_local void @omp_unset_lock(ptr) -declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*) +declare dso_local void @omp_destroy_lock(ptr) -declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*) +declare dso_local i32 @omp_test_lock(ptr) -declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*) +declare dso_local void @omp_init_nest_lock(ptr) -declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*) +declare dso_local void @omp_set_nest_lock(ptr) -declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*) +declare dso_local void @omp_unset_nest_lock(ptr) -declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*) +declare dso_local void @omp_destroy_nest_lock(ptr) -declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*) +declare dso_local i32 @omp_test_nest_lock(ptr) -declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32) +declare dso_local void @omp_init_lock_with_hint(ptr, i32) -declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) +declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32) declare dso_local double @omp_get_wtime() @@ -396,19 +396,19 @@ declare dso_local i32 @omp_get_cancellation() declare dso_local i32 @omp_get_initial_device() -declare dso_local i8* @omp_target_alloc(i64, i32) +declare dso_local ptr @omp_target_alloc(i64, i32) -declare dso_local void @use_voidptr(i8*) +declare dso_local void @use_voidptr(ptr) -declare dso_local void @omp_target_free(i8*, i32) +declare dso_local void @omp_target_free(ptr, i32) -declare dso_local i32 @omp_target_is_present(i8*, i32) +declare dso_local i32 @omp_target_is_present(ptr, i32) -declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32) +declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32) -declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32) +declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32) -declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32) +declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32) declare dso_local i32 @omp_get_device_num() @@ -418,15 +418,15 @@ declare dso_local i32 @omp_get_num_places() declare dso_local i32 @omp_get_place_num_procs(i32) -declare dso_local void @omp_get_place_proc_ids(i32, i32*) +declare dso_local void @omp_get_place_proc_ids(i32, ptr) declare dso_local i32 @omp_get_place_num() declare dso_local i32 @omp_get_partition_num_places() -declare dso_local void @omp_get_partition_place_nums(i32*) +declare dso_local void @omp_get_partition_place_nums(ptr) -declare dso_local i32 @omp_control_tool(i32, i32, i8*) +declare dso_local i32 @omp_control_tool(i32, i32, ptr) declare dso_local void @omp_destroy_allocator(i64) @@ -434,19 +434,19 @@ declare dso_local void @omp_set_default_allocator(i64) declare dso_local i64 @omp_get_default_allocator() -declare dso_local i8* @omp_alloc(i64, i64) +declare dso_local ptr @omp_alloc(i64, i64) -declare dso_local void @omp_free(i8*, i64) +declare dso_local void @omp_free(ptr, i64) -declare dso_local void @ompc_set_affinity_format(i8*) +declare dso_local void @ompc_set_affinity_format(ptr) -declare dso_local i64 @ompc_get_affinity_format(i8*, i64) +declare dso_local i64 @ompc_get_affinity_format(ptr, i64) declare dso_local void @use_sizet(i64) -declare dso_local void @ompc_display_affinity(i8*) +declare dso_local void @ompc_display_affinity(ptr) -declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*) +declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr) declare dso_local void @omp_fulfill_event(i64) @@ -456,216 +456,216 @@ declare dso_local i32 @omp_pause_resource_all(i32) declare dso_local i32 @omp_get_supported_active_levels() -declare void @__kmpc_barrier(%struct.ident_t*, i32) +declare void @__kmpc_barrier(ptr, i32) -declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) +declare i32 @__kmpc_cancel(ptr, i32, i32) -declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) +declare i32 @__kmpc_cancel_barrier(ptr, i32) -declare void @__kmpc_flush(%struct.ident_t*) +declare void @__kmpc_flush(ptr) -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_global_thread_num(ptr) -declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +declare void @__kmpc_fork_call(ptr, i32, ptr, ...) -declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) +declare i32 @__kmpc_omp_taskwait(ptr, i32) -declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) +declare i32 @__kmpc_omp_taskyield(ptr, i32, i32) -declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) +declare void @__kmpc_push_num_threads(ptr, i32, i32) -declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) +declare void @__kmpc_push_proc_bind(ptr, i32, i32) -declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) +declare void @__kmpc_serialized_parallel(ptr, i32) -declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) +declare void @__kmpc_end_serialized_parallel(ptr, i32) -declare i32 @__kmpc_master(%struct.ident_t*, i32) +declare i32 @__kmpc_master(ptr, i32) -declare void @__kmpc_end_master(%struct.ident_t*, i32) +declare void @__kmpc_end_master(ptr, i32) -declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) +declare void @__kmpc_critical(ptr, i32, ptr) -declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) +declare void @__kmpc_critical_with_hint(ptr, i32, ptr, i32) -declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) +declare void @__kmpc_end_critical(ptr, i32, ptr) -declare void @__kmpc_begin(%struct.ident_t*, i32) +declare void @__kmpc_begin(ptr, i32) -declare void @__kmpc_end(%struct.ident_t*) +declare void @__kmpc_end(ptr) -declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +declare i32 @__kmpc_reduce(ptr, i32, i32, i64, ptr, ptr, ptr) -declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr) -declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) +declare void @__kmpc_end_reduce(ptr, i32, ptr) -declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) +declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr) -declare void @__kmpc_ordered(%struct.ident_t*, i32) +declare void @__kmpc_ordered(ptr, i32) -declare void @__kmpc_end_ordered(%struct.ident_t*, i32) +declare void @__kmpc_end_ordered(ptr, i32) -declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64) -declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64) -declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) +declare void @__kmpc_for_static_fini(ptr, i32) -declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_team_static_init_4(ptr, i32, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_team_static_init_4u(ptr, i32, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_team_static_init_8(ptr, i32, ptr, ptr, ptr, ptr, i64, i64) -declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_team_static_init_8u(ptr, i32, ptr, ptr, ptr, ptr, i64, i64) -declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_dist_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +declare void @__kmpc_dist_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32) -declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_dist_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64) -declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +declare void @__kmpc_dist_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64) -declare i32 @__kmpc_single(%struct.ident_t*, i32) +declare i32 @__kmpc_single(ptr, i32) -declare void @__kmpc_end_single(%struct.ident_t*, i32) +declare void @__kmpc_end_single(ptr, i32) -declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) +declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) -declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) +declare i32 @__kmpc_omp_task(ptr, i32, ptr) -declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) +declare void @__kmpc_end_taskgroup(ptr, i32) -declare void @__kmpc_taskgroup(%struct.ident_t*, i32) +declare void @__kmpc_taskgroup(ptr, i32) -declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +declare void @__kmpc_dist_dispatch_init_4(ptr, i32, i32, ptr, i32, i32, i32, i32) -declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +declare void @__kmpc_dist_dispatch_init_4u(ptr, i32, i32, ptr, i32, i32, i32, i32) -declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +declare void @__kmpc_dist_dispatch_init_8(ptr, i32, i32, ptr, i64, i64, i64, i64) -declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +declare void @__kmpc_dist_dispatch_init_8u(ptr, i32, i32, ptr, i64, i64, i64, i64) -declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +declare void @__kmpc_dispatch_init_4(ptr, i32, i32, i32, i32, i32, i32) -declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +declare void @__kmpc_dispatch_init_4u(ptr, i32, i32, i32, i32, i32, i32) -declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +declare void @__kmpc_dispatch_init_8(ptr, i32, i32, i64, i64, i64, i64) -declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +declare void @__kmpc_dispatch_init_8u(ptr, i32, i32, i64, i64, i64, i64) -declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +declare i32 @__kmpc_dispatch_next_4(ptr, i32, ptr, ptr, ptr, ptr) -declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +declare i32 @__kmpc_dispatch_next_4u(ptr, i32, ptr, ptr, ptr, ptr) -declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +declare i32 @__kmpc_dispatch_next_8(ptr, i32, ptr, ptr, ptr, ptr) -declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +declare i32 @__kmpc_dispatch_next_8u(ptr, i32, ptr, ptr, ptr, ptr) -declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) +declare void @__kmpc_dispatch_fini_4(ptr, i32) -declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) +declare void @__kmpc_dispatch_fini_4u(ptr, i32) -declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) +declare void @__kmpc_dispatch_fini_8(ptr, i32) -declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) +declare void @__kmpc_dispatch_fini_8u(ptr, i32) -declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) +declare void @__kmpc_omp_task_begin_if0(ptr, i32, ptr) -declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) +declare void @__kmpc_omp_task_complete_if0(ptr, i32, ptr) -declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) +declare i32 @__kmpc_omp_task_with_deps(ptr, i32, ptr, i32, ptr, i32, ptr) -declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) +declare void @__kmpc_omp_wait_deps(ptr, i32, i32, ptr, i32, ptr) -declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) +declare i32 @__kmpc_cancellationpoint(ptr, i32, i32) -declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) +declare void @__kmpc_push_num_teams(ptr, i32, i32, i32) -declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +declare void @__kmpc_fork_teams(ptr, i32, ptr, ...) -declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) +declare void @__kmpc_taskloop(ptr, i32, ptr, i32, ptr, ptr, i64, i32, i32, i64, ptr) -declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) +declare ptr @__kmpc_omp_target_task_alloc(ptr, i32, i32, i64, i64, ptr, i64) -declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) +declare ptr @__kmpc_taskred_modifier_init(ptr, i32, i32, i32, ptr) -declare i8* @__kmpc_taskred_init(i32, i32, i8*) +declare ptr @__kmpc_taskred_init(i32, i32, ptr) -declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) +declare void @__kmpc_task_reduction_modifier_fini(ptr, i32, i32) -declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) +declare void @__kmpc_copyprivate(ptr, i32, i64, ptr, ptr, i32) -declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) +declare ptr @__kmpc_threadprivate_cached(ptr, i32, ptr, i64, ptr) -declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) +declare void @__kmpc_threadprivate_register(ptr, ptr, ptr, ptr, ptr) -declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) +declare void @__kmpc_doacross_init(ptr, i32, i32, ptr) -declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) +declare void @__kmpc_doacross_wait(ptr, i32, ptr) -declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) +declare void @__kmpc_doacross_post(ptr, i32, ptr) -declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) +declare void @__kmpc_doacross_fini(ptr, i32) -declare i8* @__kmpc_alloc(i32, i64, i8*) +declare ptr @__kmpc_alloc(i32, i64, ptr) -declare void @__kmpc_free(i32, i8*, i8*) +declare void @__kmpc_free(i32, ptr, ptr) -declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) +declare ptr @__kmpc_init_allocator(i32, ptr, i32, ptr) -declare void @__kmpc_destroy_allocator(i32, i8*) +declare void @__kmpc_destroy_allocator(i32, ptr) -declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) +declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64) declare i64 @__kmpc_warp_active_thread_mask() declare void @__kmpc_syncwarp(i64) -declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*) +declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr) -declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32) +declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32) -declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*) +declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr) declare void @__tgt_register_requires(i64) -declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) -declare i64 @__tgt_mapper_num_components(i8*) +declare i64 @__tgt_mapper_num_components(ptr) -declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*) +declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr) -declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) +declare ptr @__kmpc_task_allow_completion_event(ptr, i32, ptr) -declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) +declare ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr) -declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) +declare ptr @__kmpc_task_reduction_init(i32, i32, ptr) -declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) +declare ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr) -declare void @__kmpc_proxy_task_completed_ooo(i8*) +declare void @__kmpc_proxy_task_completed_ooo(ptr) ; Function Attrs: noinline cold -declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) #0 +declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32) #0 attributes #0 = { noinline cold } @@ -730,46 +730,46 @@ attributes #0 = { noinline cold } ; CHECK-NEXT: declare dso_local i32 @omp_get_max_active_levels() ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) +; CHECK-NEXT: declare dso_local void @omp_get_schedule(ptr nocapture writeonly, ptr nocapture writeonly) ; CHECK-NOT: Function Attrs ; CHECK: declare dso_local i32 @omp_get_max_task_priority() ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_init_lock(%struct.omp_lock_t*) +; CHECK: declare dso_local void @omp_init_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_set_lock(%struct.omp_lock_t*) +; CHECK: declare dso_local void @omp_set_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_unset_lock(%struct.omp_lock_t*) +; CHECK: declare dso_local void @omp_unset_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*) +; CHECK: declare dso_local void @omp_destroy_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*) +; CHECK: declare dso_local i32 @omp_test_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*) +; CHECK: declare dso_local void @omp_init_nest_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*) +; CHECK: declare dso_local void @omp_set_nest_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*) +; CHECK: declare dso_local void @omp_unset_nest_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*) +; CHECK: declare dso_local void @omp_destroy_nest_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*) +; CHECK: declare dso_local i32 @omp_test_nest_lock(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32) +; CHECK: declare dso_local void @omp_init_lock_with_hint(ptr, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) +; CHECK: declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32) ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare dso_local double @omp_get_wtime() @@ -805,25 +805,25 @@ attributes #0 = { noinline cold } ; CHECK: declare dso_local i32 @omp_get_initial_device() ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i8* @omp_target_alloc(i64, i32) +; CHECK: declare dso_local ptr @omp_target_alloc(i64, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @use_voidptr(i8*) +; CHECK: declare dso_local void @use_voidptr(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_target_free(i8*, i32) +; CHECK: declare dso_local void @omp_target_free(ptr, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_target_is_present(i8*, i32) +; CHECK: declare dso_local i32 @omp_target_is_present(ptr, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32) +; CHECK: declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32) +; CHECK: declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32) +; CHECK: declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32) ; CHECK-NOT: Function Attrs ; CHECK: declare dso_local i32 @omp_get_device_num() @@ -838,7 +838,7 @@ attributes #0 = { noinline cold } ; CHECK: declare dso_local i32 @omp_get_place_num_procs(i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) +; CHECK-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, ptr nocapture writeonly) ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare dso_local i32 @omp_get_place_num() @@ -847,10 +847,10 @@ attributes #0 = { noinline cold } ; CHECK-NEXT: declare dso_local i32 @omp_get_partition_num_places() ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) +; CHECK-NEXT: declare dso_local void @omp_get_partition_place_nums(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i32 @omp_control_tool(i32, i32, i8*) +; CHECK: declare dso_local i32 @omp_control_tool(i32, i32, ptr) ; CHECK-NOT: Function Attrs ; CHECK: declare dso_local void @omp_destroy_allocator(i64) @@ -862,25 +862,25 @@ attributes #0 = { noinline cold } ; CHECK: declare dso_local i64 @omp_get_default_allocator() ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i8* @omp_alloc(i64, i64) +; CHECK: declare dso_local ptr @omp_alloc(i64, i64) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @omp_free(i8*, i64) +; CHECK: declare dso_local void @omp_free(ptr, i64) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @ompc_set_affinity_format(i8*) +; CHECK: declare dso_local void @ompc_set_affinity_format(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i64 @ompc_get_affinity_format(i8*, i64) +; CHECK: declare dso_local i64 @ompc_get_affinity_format(ptr, i64) ; CHECK-NOT: Function Attrs ; CHECK: declare dso_local void @use_sizet(i64) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local void @ompc_display_affinity(i8*) +; CHECK: declare dso_local void @ompc_display_affinity(ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*) +; CHECK: declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr) ; CHECK-NOT: Function Attrs ; CHECK: declare dso_local void @omp_fulfill_event(i64) @@ -895,256 +895,256 @@ attributes #0 = { noinline cold } ; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_barrier(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancel(ptr, i32, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) +; CHECK-NEXT: declare void @__kmpc_flush(ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_call(ptr, i32, ptr, ...) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_threads(ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_proc_bind(ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_serialized_parallel(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_master(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_master(ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK-NEXT: declare void @__kmpc_critical(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) +; CHECK-NEXT: declare void @__kmpc_critical_with_hint(ptr, i32, ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) +; CHECK-NEXT: declare void @__kmpc_end_critical(ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_begin(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) +; CHECK-NEXT: declare void @__kmpc_end(ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK-NEXT: declare i32 @__kmpc_reduce(ptr, i32, i32, i64, ptr, ptr, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) +; CHECK-NEXT: declare void @__kmpc_end_reduce(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) +; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_ordered(ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_ordered(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_for_static_fini(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4(ptr, i32, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(ptr, i32, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8(ptr, i32, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(ptr, i32, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) +; CHECK-NEXT: declare i32 @__kmpc_single(ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_single(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) +; CHECK-NEXT: declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_end_taskgroup(ptr, i32) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_taskgroup(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(ptr, i32, i32, ptr, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(ptr, i32, i32, ptr, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(ptr, i32, i32, ptr, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(ptr, i32, i32, ptr, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(ptr, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(ptr, i32, i32, i32, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(ptr, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) +; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(ptr, i32, i32, i64, i64, i64, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(ptr, i32, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(ptr, i32, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(ptr, i32, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) +; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(ptr, i32, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) +; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(ptr, i32, ptr, i32, ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(ptr, i32, i32, ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) +; CHECK-NEXT: declare void @__kmpc_push_num_teams(ptr, i32, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +; CHECK-NEXT: declare void @__kmpc_fork_teams(ptr, i32, ptr, ...) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) +; CHECK-NEXT: declare void @__kmpc_taskloop(ptr, i32, ptr, i32, ptr, ptr, i64, i32, i32, i64, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) +; CHECK-NEXT: declare ptr @__kmpc_omp_target_task_alloc(ptr, i32, i32, i64, i64, ptr, i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_taskred_modifier_init(ptr, i32, i32, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_taskred_init(i32, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) +; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) +; CHECK-NEXT: declare void @__kmpc_copyprivate(ptr, i32, i64, ptr, ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) +; CHECK-NEXT: declare ptr @__kmpc_threadprivate_cached(ptr, i32, ptr, i64, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) +; CHECK-NEXT: declare void @__kmpc_threadprivate_register(ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) +; CHECK-NEXT: declare void @__kmpc_doacross_init(ptr, i32, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) +; CHECK-NEXT: declare void @__kmpc_doacross_wait(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) +; CHECK-NEXT: declare void @__kmpc_doacross_post(ptr, i32, ptr) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) +; CHECK-NEXT: declare void @__kmpc_doacross_fini(ptr, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) +; CHECK-NEXT: declare ptr @__kmpc_alloc(i32, i64, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) +; CHECK-NEXT: declare void @__kmpc_free(i32, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_init_allocator(i32, ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) +; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64) ; CHECK: ; Function Attrs: convergent nounwind ; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask() @@ -1153,61 +1153,61 @@ attributes #0 = { noinline cold } ; CHECK-NEXT: declare void @__kmpc_syncwarp(i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*) +; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32) +; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*) +; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare void @__tgt_register_requires(i64) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) +; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*) +; CHECK-NEXT: declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_task_allow_completion_event(ptr, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) +; CHECK-NEXT: declare ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_task_reduction_init(i32, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) +; CHECK-NEXT: declare ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) +; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(ptr) ; CHECK: ; Function Attrs: cold convergent noinline nounwind -; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) +; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) @@ -1270,46 +1270,46 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(ptr nocapture writeonly, ptr nocapture writeonly) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority() ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_init_lock(%struct.omp_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_init_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_set_lock(%struct.omp_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_set_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_unset_lock(%struct.omp_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_unset_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_destroy_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*) +; OPTIMISTIC: declare dso_local i32 @omp_test_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_init_nest_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_set_nest_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_unset_nest_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*) +; OPTIMISTIC: declare dso_local void @omp_destroy_nest_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*) +; OPTIMISTIC: declare dso_local i32 @omp_test_nest_lock(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32) +; OPTIMISTIC: declare dso_local void @omp_init_lock_with_hint(ptr, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) +; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local double @omp_get_wtime() @@ -1345,25 +1345,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device() ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i8* @omp_target_alloc(i64, i32) +; OPTIMISTIC: declare dso_local ptr @omp_target_alloc(i64, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @use_voidptr(i8*) +; OPTIMISTIC: declare dso_local void @use_voidptr(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_target_free(i8*, i32) +; OPTIMISTIC: declare dso_local void @omp_target_free(ptr, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_target_is_present(i8*, i32) +; OPTIMISTIC: declare dso_local i32 @omp_target_is_present(ptr, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32) +; OPTIMISTIC: declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32) +; OPTIMISTIC: declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32) +; OPTIMISTIC: declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() @@ -1378,7 +1378,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, ptr nocapture writeonly) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() @@ -1387,10 +1387,10 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) -; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) +; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*) +; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, ptr) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @omp_destroy_allocator(i64) @@ -1402,25 +1402,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: declare dso_local i64 @omp_get_default_allocator() ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i8* @omp_alloc(i64, i64) +; OPTIMISTIC: declare dso_local ptr @omp_alloc(i64, i64) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @omp_free(i8*, i64) +; OPTIMISTIC: declare dso_local void @omp_free(ptr, i64) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @ompc_set_affinity_format(i8*) +; OPTIMISTIC: declare dso_local void @ompc_set_affinity_format(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i64 @ompc_get_affinity_format(i8*, i64) +; OPTIMISTIC: declare dso_local i64 @ompc_get_affinity_format(ptr, i64) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_sizet(i64) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local void @ompc_display_affinity(i8*) +; OPTIMISTIC: declare dso_local void @ompc_display_affinity(ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*) +; OPTIMISTIC: declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @omp_fulfill_event(i64) @@ -1435,244 +1435,244 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) +; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly, ...) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(ptr nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(ptr nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(ptr nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) +; OPTIMISTIC-NEXT: declare void @__kmpc_critical(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(ptr nocapture nofree readonly, i32, ptr, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_begin(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_begin(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare void @__kmpc_end(ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(ptr nocapture nofree readonly, i32, i32, i64, ptr nocapture nofree readonly, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(ptr nocapture nofree readonly, i32, i32, i64, ptr nocapture nofree readonly, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_omp_task_alloc(ptr nocapture nofree readonly, i32, i32, i64, i64, ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i32, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i32, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i64, i64, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i64, i64, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(ptr nocapture nofree readonly, i32, i32, i32, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(ptr nocapture nofree readonly, i32, i32, i32, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(ptr nocapture nofree readonly, i32, i32, i64, i64, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(ptr nocapture nofree readonly, i32, i32, i64, i64, i64, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(ptr nocapture nofree readonly, i32, ptr, i32, ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32) +; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(ptr nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) -; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(ptr nocapture nofree readonly, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...) +; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly, ...) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i64* nocapture nofree, i64* nocapture nofree, i64, i32, i32, i64, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(ptr nocapture nofree readonly, i32, ptr, i32, ptr nocapture nofree, ptr nocapture nofree, i64, i32, i32, i64, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly, i64) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_omp_target_task_alloc(ptr nocapture nofree readonly, i32, i32, i64, i64, ptr nocapture nofree readonly, i64) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_taskred_modifier_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_taskred_modifier_init(ptr nocapture nofree readonly, i32, i32, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) +; OPTIMISTIC-NEXT: declare ptr @__kmpc_taskred_init(i32, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(ptr nocapture nofree readonly, i32, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t* nocapture nofree readonly, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(ptr nocapture nofree readonly, i32, i64, ptr nocapture nofree readonly, ptr, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_threadprivate_cached(%struct.ident_t* nocapture nofree readonly, i32, i8*, i64, i8***) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_threadprivate_cached(ptr nocapture nofree readonly, i32, ptr, i64, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(ptr nocapture nofree readonly, ptr, ptr nocapture nofree readonly, ptr nocapture nofree readonly, ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(ptr nocapture nofree readonly, i32, i32, ptr) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly) +; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(ptr nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_alloc(i32, i64, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_alloc(i32, i64, ptr) ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, i8*, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_init_allocator(i32, ptr, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) -; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask() @@ -1681,61 +1681,61 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*) +; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32) +; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*) +; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare void @__tgt_register_requires(i64) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(i8*) +; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(ptr) ; OPTIMISTIC: ; Function Attrs: nounwind -; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*) +; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_allow_completion_event(%struct.ident_t* nocapture nofree readonly, i32, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_allow_completion_event(ptr nocapture nofree readonly, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_init(i32, i32, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_init(i32, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) +; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn -; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) +; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(ptr) ; OPTIMISTIC: ; Function Attrs: cold convergent noinline nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32) !llvm.module.flags = !{!0} diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index e7b81a9d8f176e..cc4a52847dffa5 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes ; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" @G = external global i8 ; Function Attrs: convergent norecurse nounwind @@ -13,7 +13,7 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK: Function Attrs: convergent norecurse nounwind ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -23,14 +23,14 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: store i8 1, i8* @G, align 1 -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; CHECK-NEXT: store i8 1, ptr @G, align 1 +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -40,9 +40,9 @@ user_code.entry: ; preds = %entry ; TODO: This is not perfect. We should rather go for SPMD mode and tell the runtime ; to only spawn a single thread. Further, we then should not guard any code. %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() - store i8 %isSPMD, i8* @G + store i8 %isSPMD, ptr @G call void @bar() #2 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -51,9 +51,9 @@ worker.exit: ; preds = %entry declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) +declare i32 @__kmpc_target_init(ptr, i8, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: convergent nounwind define hidden void @bar() #1 { @@ -76,7 +76,7 @@ attributes #2 = { convergent } !llvm.ident = !{!7} !0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} -!1 = !{void ()* @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} +!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll index 86772ae8bd54db..9c0416af359d4d 100644 --- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll +++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll @@ -13,10 +13,10 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a ret i8 undef } -declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, float**, float**, i64, float**, float*, float*, i64) local_unnamed_addr +declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr !nvvm.annotations = !{!0} -!0 = !{void (i64, i64, i64, float**, float**, i64, float**, float*, float*, i64)* @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1} +!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll index 05dd79410cbc1f..e684f65cf2e0c0 100644 --- a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll @@ -4,13 +4,13 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, i8* } +%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } -%"struct.(anonymous namespace)::ThreadStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", %"struct.(anonymous namespace)::ThreadStateTy"* } +%"struct.(anonymous namespace)::ThreadStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", ptr } @_ZN12_GLOBAL__N_19TeamStateE = internal addrspace(3) global %"struct.(anonymous namespace)::TeamStateTy" undef, align 8 -define internal i8* @__kmpc_alloc_shared() { +define internal ptr @__kmpc_alloc_shared() { lor.lhs.false.i.i: br i1 false, label %_ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i, label %if.then.i44.i.i @@ -18,11 +18,10 @@ if.then.i44.i.i: ; preds = %lor.lhs.false.i.i br label %_ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i _ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i: ; preds = %if.then.i44.i.i, %lor.lhs.false.i.i - %.pn.i45.i.i = phi i8* [ null, %if.then.i44.i.i ], [ addrspacecast (i8 addrspace(3)* bitcast (%"struct.(anonymous namespace)::TeamStateTy" addrspace(3)* @_ZN12_GLOBAL__N_19TeamStateE to i8 addrspace(3)*) to i8*), %lor.lhs.false.i.i ] - %retval.0.in.i.i.i = getelementptr inbounds i8, i8* %.pn.i45.i.i, i64 4 - %retval.0.i46.i.i = bitcast i8* %retval.0.in.i.i.i to i32* - %0 = load i32, i32* %retval.0.i46.i.i, align 4 - ret i8* null + %.pn.i45.i.i = phi ptr [ null, %if.then.i44.i.i ], [ addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE to ptr), %lor.lhs.false.i.i ] + %retval.0.in.i.i.i = getelementptr inbounds i8, ptr %.pn.i45.i.i, i64 4 + %0 = load i32, ptr %retval.0.in.i.i.i, align 4 + ret ptr null } !llvm.module.flags = !{!0, !1} diff --git a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll index efc21c5169f956..fa3fd730eb495c 100644 --- a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll @@ -4,49 +4,49 @@ ; A | ; \-------------/ -%"struct.TS" = type { i32, %"struct.TS"* } +%"struct.TS" = type { i32, ptr } define weak amdgpu_kernel void @k() { ; CHECK-LABEL: define {{[^@]+}}@k() { ; CHECK-NEXT: BB1: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: BB2: -; CHECK-NEXT: [[DOTPRE158_I:%.*]] = phi %struct.TS* [ null, [[BB1:%.*]] ], [ [[PRE2:%.*]], [[BB6:%.*]] ] +; CHECK-NEXT: [[DOTPRE158_I:%.*]] = phi ptr [ null, [[BB1:%.*]] ], [ [[PRE2:%.*]], [[BB6:%.*]] ] ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3:%.*]] ; CHECK: BB3: ; CHECK-NEXT: br label [[BB4]] ; CHECK: BB4: -; CHECK-NEXT: [[PRE1:%.*]] = phi %struct.TS* [ [[DOTPRE158_I]], [[BB3]] ], [ null, [[BB2]] ] +; CHECK-NEXT: [[PRE1:%.*]] = phi ptr [ [[DOTPRE158_I]], [[BB3]] ], [ null, [[BB2]] ] ; CHECK-NEXT: br i1 false, label [[BB6]], label [[BB5:%.*]] ; CHECK: BB5: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], %struct.TS* [[PRE1]], i64 0, i32 1 -; CHECK-NEXT: [[Q3:%.*]] = load %struct.TS*, %struct.TS** [[GEP]], align 8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], ptr [[PRE1]], i64 0, i32 1 +; CHECK-NEXT: [[Q3:%.*]] = load ptr, ptr [[GEP]], align 8 ; CHECK-NEXT: br label [[BB6]] ; CHECK: BB6: -; CHECK-NEXT: [[PRE2]] = phi %struct.TS* [ null, [[BB4]] ], [ [[Q3]], [[BB5]] ] +; CHECK-NEXT: [[PRE2]] = phi ptr [ null, [[BB4]] ], [ [[Q3]], [[BB5]] ] ; CHECK-NEXT: br label [[BB2]] ; BB1: br label %BB2 BB2: - %.pre158.i = phi %"struct.TS"* [ null, %BB1 ], [ %pre2, %BB6 ] + %.pre158.i = phi ptr [ null, %BB1 ], [ %pre2, %BB6 ] br i1 false, label %BB4, label %BB3 BB3: br label %BB4 BB4: - %pre1 = phi %"struct.TS"* [ %.pre158.i, %BB3 ], [ null, %BB2 ] + %pre1 = phi ptr [ %.pre158.i, %BB3 ], [ null, %BB2 ] br i1 false, label %BB6, label %BB5 BB5: - %gep = getelementptr inbounds %"struct.TS", %"struct.TS"* %pre1, i64 0, i32 1 - %q3 = load %"struct.TS"*, %"struct.TS"** %gep, align 8 + %gep = getelementptr inbounds %"struct.TS", ptr %pre1, i64 0, i32 1 + %q3 = load ptr, ptr %gep, align 8 br label %BB6 BB6: - %pre2 = phi %"struct.TS"* [ null, %BB4 ], [ %q3, %BB5 ] + %pre2 = phi ptr [ null, %BB4 ], [ %q3, %BB5 ] br label %BB2 } diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index d7f92d872de32b..29387947a4f5cb 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -119,10 +119,10 @@ ;; { weak_callee_empty(); } ;; } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 @@ -131,25 +131,25 @@ @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 -@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" +@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 +@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -157,16 +157,16 @@ worker.exit: ; preds = %entry } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @no_parallel_region_in_here() #7 call void @unknown_no_openmp() #8 ret void @@ -174,72 +174,70 @@ entry: define hidden void @no_parallel_region_in_here() #1 { entry: - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0) + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + %1 = call i32 @__kmpc_single(ptr @2, i32 %0) %2 = icmp ne i32 %1, 0 br i1 %2, label %omp_if.then, label %omp_if.end omp_if.then: ; preds = %entry - store i32 0, i32* @G, align 4 - call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0) + store i32 0, ptr @G, align 4 + call void @__kmpc_end_single(ptr @2, i32 %0) br label %omp_if.end omp_if.end: ; preds = %omp_if.then, %entry - call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0) + call void @__kmpc_barrier(ptr @3, i32 %0) ret void } declare void @unknown_no_openmp() #2 -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 +declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0) call void @no_parallel_region_in_here() #7 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -251,25 +249,25 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3 ret void } -declare void @__kmpc_get_shared_variables(i8***) +declare void @__kmpc_get_shared_variables(ptr) -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) -define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -281,12 +279,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -294,55 +292,53 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 call void @simple_state_machine_interprocedural_before() #7 call void @no_parallel_region_in_here() #7 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0) call void @simple_state_machine_interprocedural_after() #7 ret void } define hidden void @simple_state_machine_interprocedural_before() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0) ret void } -define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -352,21 +348,20 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3 ret void } define hidden void @simple_state_machine_interprocedural_after() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0) ret void } @@ -374,46 +369,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0) + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0) %call = call i32 @unknown() #7 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -423,23 +416,23 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3 ret void } declare i32 @unknown() #4 -define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -449,12 +442,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -462,46 +455,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_att entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0) + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0) call void @unknown_no_openmp() #8 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -511,21 +502,21 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3 ret void } -define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -535,12 +526,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -548,47 +539,45 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0 entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0) call void @unknown_pure() #9 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -598,23 +587,23 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3 ret void } declare void @unknown_pure() #5 -define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -624,12 +613,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -637,29 +626,29 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %call = call i32 @omp_get_thread_num() #7 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7 ret void } @@ -667,8 +656,8 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - %0 = load i32, i32* %a.addr, align 4 + store i32 %a, ptr %a.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 %cmp = icmp eq i32 %0, 0 br i1 %cmp, label %if.then, label %if.end @@ -676,7 +665,7 @@ if.then: ; preds = %entry br label %return if.end: ; preds = %entry - %1 = load i32, i32* %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 %sub = sub nsw i32 %1, 1 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7 call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7 @@ -692,28 +681,28 @@ define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112( entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @weak_callee_empty() #7 ret void } @@ -723,18 +712,18 @@ entry: ret void } -declare i32 @__kmpc_single(%struct.ident_t*, i32) #6 +declare i32 @__kmpc_single(ptr, i32) #6 -declare void @__kmpc_end_single(%struct.ident_t*, i32) #6 +declare void @__kmpc_end_single(ptr, i32) #6 -declare void @__kmpc_barrier(%struct.ident_t*, i32) #6 +declare void @__kmpc_barrier(ptr, i32) #6 -define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -744,21 +733,21 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3 ret void } -define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -768,30 +757,29 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3 ret void } define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0) ret void } -define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -801,12 +789,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -833,14 +821,14 @@ attributes #9 = { convergent nounwind readonly willreturn } !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} @@ -850,29 +838,29 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void ; ; ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; AMDGPU-NEXT: ret void @@ -882,16 +870,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -899,16 +887,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; AMDGPU-NEXT: ret void ; ; @@ -916,10 +904,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -929,19 +917,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__2_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) @@ -957,16 +945,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -974,30 +962,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; AMDGPU-NEXT: ret void ; @@ -1009,18 +995,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1032,9 +1018,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1042,10 +1028,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1055,25 +1041,25 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check1: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute2: ; AMDGPU-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) @@ -1089,16 +1075,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1106,19 +1092,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1127,10 +1112,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1138,19 +1122,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1162,9 +1145,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1172,10 +1155,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1183,10 +1165,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1194,10 +1175,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1207,25 +1188,25 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check1: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__8_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute2: ; AMDGPU-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) @@ -1237,16 +1218,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1254,29 +1235,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1288,18 +1267,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1311,9 +1290,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1321,10 +1300,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1334,19 +1313,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__10_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) @@ -1362,16 +1341,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1379,29 +1358,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1413,18 +1390,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1436,9 +1413,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1446,10 +1423,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1459,19 +1436,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__13_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) @@ -1487,16 +1464,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1504,29 +1481,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0) -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1538,18 +1513,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1561,9 +1536,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1571,10 +1546,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1584,19 +1559,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) @@ -1608,15 +1583,15 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1624,11 +1599,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1638,14 +1613,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU: if.then: ; AMDGPU-NEXT: br label [[RETURN:%.*]] ; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -1659,14 +1634,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU: if.then: ; AMDGPU-NEXT: br label [[RETURN:%.*]] ; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -1679,10 +1654,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1692,12 +1667,12 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void @@ -1710,15 +1685,15 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1726,10 +1701,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1743,10 +1718,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1758,18 +1733,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1781,9 +1756,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1791,10 +1766,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1802,19 +1776,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1826,9 +1799,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1838,29 +1811,29 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void ; ; ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; NVPTX-NEXT: ret void @@ -1870,16 +1843,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1887,16 +1860,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; NVPTX-NEXT: ret void ; ; @@ -1904,10 +1877,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -1917,18 +1890,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__2_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) @@ -1944,16 +1917,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1961,30 +1934,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; NVPTX-NEXT: ret void ; @@ -1996,18 +1967,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2019,9 +1990,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2029,10 +2000,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2042,24 +2013,24 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] ; NVPTX: worker_state_machine.parallel_region.check1: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute2: ; NVPTX-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) @@ -2075,16 +2046,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2092,19 +2063,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2113,10 +2083,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2124,19 +2093,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2148,9 +2116,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2158,10 +2126,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2169,10 +2136,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2180,10 +2146,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2193,24 +2159,24 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] ; NVPTX: worker_state_machine.parallel_region.check1: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__8_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute2: ; NVPTX-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) @@ -2222,16 +2188,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2239,29 +2205,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2273,18 +2237,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2296,9 +2260,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2306,10 +2270,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2319,18 +2283,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__10_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) @@ -2346,16 +2310,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2363,29 +2327,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2397,18 +2359,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2420,9 +2382,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2430,10 +2392,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2443,18 +2405,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__13_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) @@ -2470,16 +2432,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2487,29 +2449,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0) -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2521,18 +2481,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2544,9 +2504,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2554,10 +2514,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2567,18 +2527,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) @@ -2590,15 +2550,15 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2606,11 +2566,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2620,14 +2580,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX: if.then: ; NVPTX-NEXT: br label [[RETURN:%.*]] ; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -2641,14 +2601,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX: if.then: ; NVPTX-NEXT: br label [[RETURN:%.*]] ; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -2661,10 +2621,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2674,11 +2634,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void @@ -2691,15 +2651,15 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2707,10 +2667,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2724,10 +2684,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2739,18 +2699,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2762,9 +2722,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2772,10 +2732,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2783,19 +2742,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2807,9 +2765,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2819,29 +2777,29 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-DISABLED-NEXT: ret i32 0 ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void @@ -2851,16 +2809,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU-DISABLED: omp_if.then: -; AMDGPU-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] ; AMDGPU-DISABLED: omp_if.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2868,16 +2826,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU-DISABLED: omp_if.then: -; AMDGPU-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] ; AMDGPU-DISABLED: omp_if.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2887,14 +2845,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2902,30 +2860,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2937,18 +2893,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2960,9 +2916,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2972,14 +2928,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2987,19 +2943,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3008,10 +2963,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3019,19 +2973,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3043,9 +2996,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3053,10 +3006,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3064,10 +3016,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3077,14 +3028,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3092,29 +3043,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3126,18 +3075,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3149,9 +3098,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3161,14 +3110,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3176,29 +3125,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3210,18 +3157,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3233,9 +3180,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3245,14 +3192,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3260,29 +3207,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3294,18 +3239,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3317,9 +3262,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3329,13 +3274,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3343,11 +3288,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3357,14 +3302,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU-DISABLED: if.then: ; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]] ; AMDGPU-DISABLED: if.end: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -3378,14 +3323,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU-DISABLED: if.then: ; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]] ; AMDGPU-DISABLED: if.end: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -3400,13 +3345,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3414,10 +3359,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3431,10 +3376,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3446,18 +3391,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3469,9 +3414,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3479,10 +3424,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3490,19 +3434,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3514,9 +3457,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3526,29 +3469,29 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-DISABLED-NEXT: ret i32 0 ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void @@ -3558,16 +3501,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX-DISABLED: omp_if.then: -; NVPTX-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] ; NVPTX-DISABLED: omp_if.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3575,16 +3518,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX-DISABLED: omp_if.then: -; NVPTX-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] ; NVPTX-DISABLED: omp_if.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3594,14 +3537,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3609,30 +3552,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3644,18 +3585,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3667,9 +3608,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3679,14 +3620,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3694,19 +3635,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3715,10 +3655,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3726,19 +3665,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3750,9 +3688,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3760,10 +3698,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3771,10 +3708,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3784,14 +3720,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3799,29 +3735,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3833,18 +3767,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3856,9 +3790,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3868,14 +3802,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3883,29 +3817,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3917,18 +3849,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3940,9 +3872,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3952,14 +3884,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3967,29 +3899,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4001,18 +3931,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4024,9 +3954,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4036,13 +3966,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4050,11 +3980,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4064,14 +3994,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX-DISABLED: if.then: ; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]] ; NVPTX-DISABLED: if.end: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -4085,14 +4015,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX-DISABLED: if.then: ; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]] ; NVPTX-DISABLED: if.end: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -4107,13 +4037,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4121,10 +4051,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4138,10 +4068,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4153,18 +4083,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4176,9 +4106,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4186,10 +4116,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4197,19 +4126,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -4221,8 +4149,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll index 4fb9a4d55dcdc1..6dae1732b8c8a1 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -119,10 +119,10 @@ ;; { weak_callee_empty(); } ;; } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 @@ -131,25 +131,25 @@ @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 -@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" +@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 +@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -157,14 +157,14 @@ worker.exit: ; preds = %entry } ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO. -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1); +declare i32 @__kmpc_target_init(ptr, i8, i1); -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @no_parallel_region_in_here() #7 call void @unknown_no_openmp() #8 ret void @@ -172,72 +172,70 @@ entry: define hidden void @no_parallel_region_in_here() #1 { entry: - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0) + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + %1 = call i32 @__kmpc_single(ptr @2, i32 %0) %2 = icmp ne i32 %1, 0 br i1 %2, label %omp_if.then, label %omp_if.end omp_if.then: ; preds = %entry - store i32 0, i32* @G, align 4 - call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0) + store i32 0, ptr @G, align 4 + call void @__kmpc_end_single(ptr @2, i32 %0) br label %omp_if.end omp_if.end: ; preds = %omp_if.then, %entry - call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0) + call void @__kmpc_barrier(ptr @3, i32 %0) ret void } declare void @unknown_no_openmp() #2 -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 +declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0) call void @no_parallel_region_in_here() #7 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -249,25 +247,25 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3 ret void } -declare void @__kmpc_get_shared_variables(i8***) +declare void @__kmpc_get_shared_variables(ptr) -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) -define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -279,12 +277,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -292,55 +290,53 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 call void @simple_state_machine_interprocedural_before() #7 call void @no_parallel_region_in_here() #7 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0) call void @simple_state_machine_interprocedural_after() #7 ret void } define hidden void @simple_state_machine_interprocedural_before() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0) ret void } -define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -350,21 +346,20 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3 ret void } define hidden void @simple_state_machine_interprocedural_after() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0) ret void } @@ -372,46 +367,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0) + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0) %call = call i32 @unknown() #7 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -421,23 +414,23 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3 ret void } declare i32 @unknown() #4 -define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -447,12 +440,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -460,46 +453,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_att entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0) + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0) call void @unknown_no_openmp() #8 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -509,21 +500,21 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3 ret void } -define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -533,12 +524,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -546,47 +537,45 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0 entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown_no_openmp() #8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0) + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0) call void @unknown_pure() #9 - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -596,23 +585,23 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3 ret void } declare void @unknown_pure() #5 -define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p1() #7 ret void } @@ -622,12 +611,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -635,29 +624,29 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %call = call i32 @omp_get_thread_num() #7 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7 ret void } @@ -665,8 +654,8 @@ entry: define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - %0 = load i32, i32* %a.addr, align 4 + store i32 %a, ptr %a.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 %cmp = icmp eq i32 %0, 0 br i1 %cmp, label %if.then, label %if.end @@ -674,7 +663,7 @@ if.then: ; preds = %entry br label %return if.end: ; preds = %entry - %1 = load i32, i32* %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 %sub = sub nsw i32 %1, 1 call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7 call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7 @@ -690,28 +679,28 @@ define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112( entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @weak_callee_empty() #7 ret void } @@ -721,18 +710,18 @@ entry: ret void } -declare i32 @__kmpc_single(%struct.ident_t*, i32) #6 +declare i32 @__kmpc_single(ptr, i32) #6 -declare void @__kmpc_end_single(%struct.ident_t*, i32) #6 +declare void @__kmpc_end_single(ptr, i32) #6 -declare void @__kmpc_barrier(%struct.ident_t*, i32) #6 +declare void @__kmpc_barrier(ptr, i32) #6 -define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -742,21 +731,21 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3 ret void } -define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -766,30 +755,29 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3 ret void } define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0) ret void } -define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @p0() #7 ret void } @@ -799,12 +787,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3 ret void } @@ -831,14 +819,14 @@ attributes #9 = { convergent nounwind readonly willreturn } !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} -!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} -!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} -!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} -!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} -!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} -!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} -!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} -!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} @@ -848,13 +836,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -862,10 +850,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; AMDGPU-NEXT: ret void @@ -875,16 +863,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -892,16 +880,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; AMDGPU-NEXT: ret void ; ; @@ -911,14 +899,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -926,30 +914,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; AMDGPU-NEXT: ret void ; @@ -961,18 +947,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -984,9 +970,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -996,14 +982,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1011,19 +997,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1032,10 +1017,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1043,19 +1027,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1067,9 +1050,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1077,10 +1060,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1088,10 +1070,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1101,14 +1082,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1116,29 +1097,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1150,18 +1129,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1173,9 +1152,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1185,14 +1164,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1200,29 +1179,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1234,18 +1211,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1257,9 +1234,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1269,14 +1246,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1284,29 +1261,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1318,18 +1293,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p1() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1341,9 +1316,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1353,13 +1328,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1367,11 +1342,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1381,14 +1356,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU: if.then: ; AMDGPU-NEXT: br label [[RETURN:%.*]] ; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -1402,14 +1377,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU: if.then: ; AMDGPU-NEXT: br label [[RETURN:%.*]] ; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -1424,13 +1399,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1438,10 +1413,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1455,10 +1430,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1470,18 +1445,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1493,9 +1468,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1503,10 +1478,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1514,19 +1488,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @p0() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; @@ -1538,9 +1511,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1550,13 +1523,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1564,10 +1537,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; NVPTX-NEXT: ret void @@ -1577,16 +1550,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1594,16 +1567,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: store i32 0, ptr @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) ; NVPTX-NEXT: ret void ; ; @@ -1613,14 +1586,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1628,30 +1601,28 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; NVPTX-NEXT: ret void ; @@ -1663,18 +1634,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1686,9 +1657,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1698,14 +1669,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1713,19 +1684,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -1734,10 +1704,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -1745,19 +1714,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1769,9 +1737,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1779,10 +1747,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -1790,10 +1757,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -1803,14 +1769,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1818,29 +1784,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1852,18 +1816,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1875,9 +1839,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1887,14 +1851,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1902,29 +1866,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1936,18 +1898,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -1959,9 +1921,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1971,14 +1933,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1986,29 +1948,27 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2020,18 +1980,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p1() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2043,9 +2003,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2055,13 +2015,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2069,11 +2029,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2083,14 +2043,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX: if.then: ; NVPTX-NEXT: br label [[RETURN:%.*]] ; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] @@ -2104,14 +2064,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX: if.then: ; NVPTX-NEXT: br label [[RETURN:%.*]] ; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] @@ -2126,13 +2086,13 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2140,10 +2100,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2157,10 +2117,10 @@ attributes #9 = { convergent nounwind readonly willreturn } ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2172,18 +2132,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2195,9 +2155,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2205,10 +2165,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2216,19 +2175,18 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @p0() #[[ATTR10]] ; NVPTX-NEXT: ret void ; @@ -2240,8 +2198,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll index d4e33c87aaf2f7..e1b7b37f3bcb66 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -35,32 +35,32 @@ target triple = "nvptx64" ;; } ;; } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1 -@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([82 x i8], [82 x i8]* @2, i32 0, i32 0) }, align 8 +@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 @4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 -@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([114 x i8], [114 x i8]* @4, i32 0, i32 0) }, align 8 +@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1 @6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 -@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([116 x i8], [116 x i8]* @6, i32 0, i32 0) }, align 8 +@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 @8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1 -@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([85 x i8], [85 x i8]* @8, i32 0, i32 0) }, align 8 +@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 @10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 -@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([117 x i8], [117 x i8]* @10, i32 0, i32 0) }, align 8 +@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1 @12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1 -@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([73 x i8], [73 x i8]* @12, i32 0, i32 0) }, align 8 +@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 @G = external global i32 -@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: - %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18 + %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -68,21 +68,19 @@ common.ret: ; preds = %entry, %user_code.e ret void, !dbg !19 user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3 + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3 call void @unknown() #6, !dbg !20 - %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8* - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3 + %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28 + call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -92,23 +90,22 @@ declare void @unknown() local_unnamed_addr #1 ; Function Attrs: nounwind define hidden void @known() local_unnamed_addr #2 !dbg !29 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13) - %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30 - call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13) + call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30 ret void, !dbg !31 } ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 +declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: - %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33 + %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -116,29 +113,27 @@ common.ret: ; preds = %entry, %user_code.e ret void, !dbg !34 user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3 - %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8* - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39 - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42 - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 call void @no_openmp() call void @no_parallelism() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46 + call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } ; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 { +define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 { entry: call void @unknown() #6, !dbg !48 ret void, !dbg !49 @@ -147,21 +142,21 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 { entry: - %global_args = alloca i8**, align 8 - call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51 + %global_args = alloca ptr, align 8 + call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51 call void @unknown() #6, !dbg !52 ret void, !dbg !51 } -declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr +declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr ; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5 ; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5 declare void @no_openmp() #7 declare void @no_parallelism() #8 @@ -187,8 +182,8 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" } !2 = !{} !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1} !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0} -!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} -!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} +!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} +!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} !7 = !{i32 7, !"Dwarf Version", i32 2} !8 = !{i32 2, !"Debug Info Version", i32 3} !9 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/OpenMP/dead_use.ll b/llvm/test/Transforms/OpenMP/dead_use.ll index c116b620b8f078..b3f5194b10fc36 100644 --- a/llvm/test/Transforms/OpenMP/dead_use.ll +++ b/llvm/test/Transforms/OpenMP/dead_use.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 ; Function Attrs: nounwind uwtable define dso_local i32 @b() #0 { @@ -11,12 +11,12 @@ define dso_local i32 @b() #0 { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a() -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = alloca i32, align 4 %2 = call i32 @a() - %3 = load i32, i32* %1, align 4 + %3 = load i32, ptr %1, align 4 ret i32 %3 } @@ -26,36 +26,36 @@ define internal i32 @a() #0 { ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @b() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB0:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.) +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = alloca i32, align 4 %2 = call i32 @b() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) - %3 = load i32, i32* %1, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined.) + %3 = load i32, ptr %1, align 4 ret i32 %3 } ; Function Attrs: norecurse nounwind uwtable -define internal void @.omp_outlined.(i32* noalias %0, i32* noalias %1) #1 { +define internal void @.omp_outlined.(ptr noalias %0, ptr noalias %1) #1 { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias [[TMP0:%.*]], i32* noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP3:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: store i32* [[TMP0]], i32** [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]] -; CHECK-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8, !tbaa [[TBAA2]] +; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]] +; CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[TBAA2]] ; CHECK-NEXT: ret void ; - %3 = alloca i32*, align 8 - %4 = alloca i32*, align 8 - store i32* %0, i32** %3, align 8, !tbaa !2 - store i32* %1, i32** %4, align 8, !tbaa !2 + %3 = alloca ptr, align 8 + %4 = alloca ptr, align 8 + store ptr %0, ptr %3, align 8, !tbaa !2 + store ptr %1, ptr %4, align 8, !tbaa !2 ret void } ; Function Attrs: nounwind -declare !callback !6 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #2 +declare !callback !6 void @__kmpc_fork_call(ptr, i32, ptr, ...) #2 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll index 070b027ae4ccb8..df5c7737ee39c1 100644 --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -2,33 +2,33 @@ ; RUN: opt -passes=openmp-opt-cgscc -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str1 }, align 8 +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str2 }, align 8 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 ; UTC_ARGS: --disable -; CHECK-DAG: @0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 -; CHECK-DAG: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 -; CHECK-DAG: @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +; CHECK-DAG: @0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8 +; CHECK-DAG: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str1 }, align 8 +; CHECK-DAG: @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str2 }, align 8 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 ; UTC_ARGS: --enable -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_global_thread_num(ptr) declare void @useI32(i32) define void @external(i1 %c) { ; CHECK-LABEL: define {{[^@]+}}@external ; CHECK-SAME: (i1 [[C:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB0:[0-9]+]]) +; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB0:[0-9]+]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] ; CHECK: t: ; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) @@ -46,17 +46,17 @@ define void @external(i1 %c) { entry: br i1 %c, label %t, label %e t: - %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @internal(i32 %c0, i32 %c0) call void @useI32(i32 %c0) br label %m e: - %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @internal(i32 %c1, i32 %c1) call void @useI32(i32 %c1) br label %m m: - %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @internal(i32 0, i32 %c2) call void @useI32(i32 %c2) ret void @@ -80,20 +80,20 @@ define internal void @internal(i32 %not_gtid, i32 %gtid) { ; CHECK-NEXT: ret void ; entry: - %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %cc = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) %c = icmp eq i32 %cc, %gtid br i1 %c, label %t, label %e t: - %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @useI32(i32 %c0) call void @external(i1 %c) br label %m e: - %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @useI32(i32 %c1) br label %m m: - %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0) call void @useI32(i32 %c2) ret void } @@ -102,7 +102,7 @@ m: define void @local_and_global_gtid_calls() { ; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) @@ -114,15 +114,15 @@ define void @local_and_global_gtid_calls() { ; entry: %.kmpc_loc.addr = alloca %struct.ident_t, align 8 - %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) - %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid0 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr) + %tid1 = call i32 @__kmpc_global_thread_num(ptr @1) + %tid2 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr) call void @useI32(i32 %tid0) call void @useI32(i32 %tid1) call void @useI32(i32 %tid2) - %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) - %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid3 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr) + %tid4 = call i32 @__kmpc_global_thread_num(ptr @2) + %tid5 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr) call void @useI32(i32 %tid3) call void @useI32(i32 %tid4) call void @useI32(i32 %tid5) @@ -132,7 +132,7 @@ entry: define void @local_gtid_calls_only() { ; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 @@ -148,15 +148,15 @@ entry: %.kmpc_loc.addr1 = alloca %struct.ident_t, align 8 %.kmpc_loc.addr2 = alloca %struct.ident_t, align 8 %.kmpc_loc.addr3 = alloca %struct.ident_t, align 8 - %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) - %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) - %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + %tid0 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr1) + %tid1 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr2) + %tid2 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr3) call void @useI32(i32 %tid0) call void @useI32(i32 %tid1) call void @useI32(i32 %tid2) - %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) - %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) - %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + %tid3 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr1) + %tid4 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr2) + %tid5 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr3) call void @useI32(i32 %tid3) call void @useI32(i32 %tid4) call void @useI32(i32 %tid5) diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll index a97f4da66f04f8..1b0f00603ff451 100644 --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -4,24 +4,24 @@ source_filename = "deduplication_remarks.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated ; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num deduplicated define dso_local void @deduplicate() local_unnamed_addr !dbg !14 { - %1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21 + %1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !21 call void @useI32(i32 %1), !dbg !23 - %2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !24 + %2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !24 call void @useI32(i32 %2), !dbg !25 - %3 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !26 + %3 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !26 call void @useI32(i32 %3), !dbg !27 ret void, !dbg !28 } -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_global_thread_num(ptr) declare !dbg !4 void @useI32(i32) local_unnamed_addr diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll index cf83e7900a1748..bfb9e3bc457a03 100644 --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -4,13 +4,13 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8 @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0 -@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata" declare void @use(i32) @@ -18,38 +18,38 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2) + %1 = call i32 @__kmpc_global_thread_num(ptr @2) + %2 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_target_deinit(ptr @1, i8 2) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) +declare i32 @__kmpc_target_init(ptr, i8, i1) -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1 +declare i32 @__kmpc_global_thread_num(ptr) #1 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { nounwind } @@ -59,7 +59,7 @@ attributes #1 = { nounwind } !llvm.module.flags = !{!2, !3, !4} !0 = !{i32 0, i32 80, i32 -1545561096, !"foo", i32 2, i32 0} -!1 = !{void ()* @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1} +!1 = !{ptr @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index 552f941ba90d08..5376a88d554732 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @kernel0_exec_mode = weak constant i8 1 @@ -13,12 +13,12 @@ target triple = "nvptx64" ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -28,15 +28,15 @@ define weak void @kernel0() #0 { ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -45,7 +45,7 @@ define weak void @kernel0() #0 { define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -53,13 +53,13 @@ define weak void @kernel1() #0 { ; CHECK-NEXT: ret void ; CHECK: main.thread.user_code: ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -69,25 +69,24 @@ define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR1]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR1]] ; CHECK-NEXT: call void @helper0() #[[ATTR1]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -95,13 +94,12 @@ common.ret: ret void user_code.entry: - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** + %0 = call i32 @__kmpc_global_thread_num(ptr null) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -114,18 +112,18 @@ define internal void @helper0() { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store i32 666, i32* @G, align 4 +; CHECK-NEXT: store i32 666, ptr @G, align 4 ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() - store i32 %threadLimit, i32* @G + store i32 %threadLimit, ptr @G ret void } @@ -157,24 +155,24 @@ define internal void @helper2() { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store i32 666, i32* @G, align 4 +; CHECK-NEXT: store i32 666, ptr @G, align 4 ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() - store i32 %threadLimit, i32* @G + store i32 %threadLimit, ptr @G ret void } -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) { +; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -202,10 +200,10 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() { ret i32 %ret } declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) #1 -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1 +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) +declare i32 @__kmpc_global_thread_num(ptr) !llvm.module.flags = !{!0, !1} @@ -215,9 +213,9 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @kernel0, !"kernel", i32 1} -!3 = !{void ()* @kernel1, !"kernel", i32 1} -!4 = !{void ()* @kernel2, !"kernel", i32 1} +!2 = !{ptr @kernel0, !"kernel", i32 1} +!3 = !{ptr @kernel1, !"kernel", i32 1} +!4 = !{ptr @kernel2, !"kernel", i32 1} ; ;. ; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } @@ -227,7 +225,7 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"} ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel0, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @kernel1, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @kernel2, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll index a4ccea5a4e69b1..591892f03755a3 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @kernel0_exec_mode = weak constant i8 1 @@ -16,18 +16,18 @@ target triple = "nvptx64" define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true) + call void @__kmpc_target_deinit(ptr null, i1 true) ret void } @@ -36,14 +36,14 @@ define weak void @kernel0() #0 { define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) ; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } @@ -52,18 +52,18 @@ define weak void @kernel1() #0 { define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } @@ -71,11 +71,11 @@ define internal void @helper0() { ; CHECK-LABEL: define {{[^@]+}}@helper0 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]] -; CHECK-NEXT: store i32 [[THREADLIMIT]], i32* @G, align 4 +; CHECK-NEXT: store i32 [[THREADLIMIT]], ptr @G, align 4 ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() - store i32 %threadLimit, i32* @G + store i32 %threadLimit, ptr @G ret void } @@ -103,17 +103,17 @@ f: define internal void @helper2() { ; CHECK-LABEL: define {{[^@]+}}@helper2() { ; CHECK-NEXT: [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]] -; CHECK-NEXT: store i32 [[THREADLIMIT]], i32* @G, align 4 +; CHECK-NEXT: store i32 [[THREADLIMIT]], ptr @G, align 4 ; CHECK-NEXT: ret void ; %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() - store i32 %threadLimit, i32* @G + store i32 %threadLimit, ptr @G ret void } declare i32 @__kmpc_get_hardware_num_threads_in_block() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext) #1 +declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1 !llvm.module.flags = !{!0, !1} @@ -123,9 +123,9 @@ attributes #0 = { optnone noinline "omp_target_thread_limit"="666" "omp_target_n !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @kernel0, !"kernel", i32 1} -!3 = !{void ()* @kernel1, !"kernel", i32 1} -!4 = !{void ()* @kernel2, !"kernel", i32 1} +!2 = !{ptr @kernel0, !"kernel", i32 1} +!3 = !{ptr @kernel1, !"kernel", i32 1} +!4 = !{ptr @kernel2, !"kernel", i32 1} ; ;. ; CHECK: attributes #[[ATTR0]] = { noinline optnone "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } @@ -133,7 +133,7 @@ attributes #0 = { optnone noinline "omp_target_thread_limit"="666" "omp_target_n ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel0, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @kernel1, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @kernel2, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll index 8cfc97aff70931..1c304ce743eda1 100644 --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @_ZL6Device = internal global double 0.000000e+00, align 8 @__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0 -define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr { +define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr { entry: - %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false) #0 + %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -18,31 +18,31 @@ common.ret: ret void user_code.entry: - %1 = load double, double* @_ZL6Device, align 8, !tbaa !11 + %1 = load double, ptr @_ZL6Device, align 8, !tbaa !11 %2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #0 %3 = icmp eq i32 %2, 0 br i1 %3, label %region.guarded, label %region.barrier region.guarded: - store double %1, double* %X, align 8, !tbaa !11 + store double %1, ptr %X, align 8, !tbaa !11 br label %region.barrier region.barrier: - tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2) - tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2) #0 + tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2) + tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() { entry: %call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1 %call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1 %div = fdiv double %call.i, %call.i2 - store double %div, double* @_ZL6Device, align 8, !tbaa !11 + store double %div, ptr @_ZL6Device, align 8, !tbaa !11 ret void } @@ -50,7 +50,7 @@ declare double @__nv_log(double) declare i32 @__kmpc_get_hardware_thread_id_in_block() -declare void @__kmpc_barrier_simple_spmd(%struct.ident_t*, i32) +declare void @__kmpc_barrier_simple_spmd(ptr, i32) attributes #0 = { nounwind } attributes #1 = { convergent nounwind } @@ -63,8 +63,8 @@ attributes #1 = { convergent nounwind } !0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1} !1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2} !2 = !{i32 1, !"_ZL6Device", i32 0, i32 0} -!3 = !{void ()* @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1} -!4 = !{void (double*)* @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1} +!3 = !{ptr @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1} +!4 = !{ptr @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1} !5 = !{i32 1, !"wchar_size", i32 4} !6 = !{i32 7, !"openmp", i32 50} !7 = !{i32 7, !"openmp-device", i32 50} @@ -76,24 +76,24 @@ attributes #1 = { convergent nounwind } !13 = !{!"omnipotent char", !14, i64 0} !14 = !{!"Simple C++ TBAA"} ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11 -; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr { +; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR2]] +; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]] +; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR2]] ; CHECK-NEXT: br label [[COMMON_RET]] ; ; @@ -104,6 +104,6 @@ attributes #1 = { convergent nounwind } ; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]] ; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]] -; CHECK-NEXT: store double [[DIV]], double* @_ZL6Device, align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll index 854cf85b551b1c..701c09ff05c1e3 100644 --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -7,35 +7,33 @@ target triple = "nvptx64" ; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -@S = external local_unnamed_addr global i8* +@S = external local_unnamed_addr global ptr define void @foo() { entry: - %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true) - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 - %x_on_stack = bitcast i8* %0 to i32* - %1 = bitcast i32* %x_on_stack to i8* - call void @share(i8* %1), !dbg !10 - call void @__kmpc_free_shared(i8* %0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) + %c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true) + %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10 + call void @share(ptr %0), !dbg !10 + call void @__kmpc_free_shared(ptr %0) + call void @__kmpc_target_deinit(ptr null, i1 false) ret void } -define internal void @share(i8* %x) { +define internal void @share(ptr %x) { entry: - store i8* %x, i8** @S + store ptr %x, ptr @S ret void } -declare i8* @__kmpc_alloc_shared(i64) +declare ptr @__kmpc_alloc_shared(i64) -declare void @__kmpc_free_shared(i8* nocapture) +declare void @__kmpc_free_shared(ptr nocapture) -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1); +declare i32 @__kmpc_target_init(ptr, i1, i1); -declare void @__kmpc_target_deinit(%struct.ident_t*, i1) +declare void @__kmpc_target_deinit(ptr, i1) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} @@ -48,7 +46,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1) !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{void ()* @foo, !"kernel", i32 1} +!7 = !{ptr @foo, !"kernel", i32 1} !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !9 = !DISubroutineType(types: !2) !10 = !DILocation(line: 5, column: 7, scope: !8) diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll index aa578a2bd71179..5efce91387e9eb 100644 --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -16,13 +16,13 @@ define void @non_kernel() { } ; Needed to trigger the openmp-opt pass -declare dso_local void @__kmpc_kernel_prepare_parallel(i8*) +declare dso_local void @__kmpc_kernel_prepare_parallel(ptr) !llvm.module.flags = !{!4} !nvvm.annotations = !{!2, !0, !1, !3, !1, !2} -!0 = !{void ()* @kernel1, !"kernel", i32 1} -!1 = !{void ()* @non_kernel, !"non_kernel", i32 1} +!0 = !{ptr @kernel1, !"kernel", i32 1} +!1 = !{ptr @non_kernel, !"non_kernel", i32 1} !2 = !{null, !"align", i32 1} -!3 = !{void ()* @kernel2, !"kernel", i32 1} +!3 = !{ptr @kernel2, !"kernel", i32 1} !4 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll index 4bedf541ca7968..29f969d23e1ff6 100644 --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -26,74 +26,72 @@ ; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef ; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef -; CHECK-DAG: icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) -; CHECK-DAG: icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; CHECK-DAG: icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__1_wrapper.ID +; CHECK-DAG: icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__2_wrapper.ID -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** %{{.*}}, i64 0) -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** %{{.*}}, i64 0) -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %{{.*}}, i64 0) +; CHECK-DAG: call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr %{{.*}}, i64 0) +; CHECK-DAG: call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr %{{.*}}, i64 0) +; CHECK-DAG: call void @__kmpc_parallel_51(ptr @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %{{.*}}, i64 0) -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 -@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata" +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata" define weak void @__omp_offloading_10301_87b2c_foo_l7() { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + store i32 0, ptr %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 %1, i32* %.threadid_temp., align 4 - call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 %1, ptr %.threadid_temp., align 4 + call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } declare void @unknown() -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - %captured_vars_addrs = alloca [0 x i8*], align 8 - %captured_vars_addrs1 = alloca [0 x i8*], align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 - %0 = load i32*, i32** %.global_tid..addr, align 8 - %1 = load i32, i32* %0, align 4 - %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %2, i64 0) + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %captured_vars_addrs1 = alloca [0 x ptr], align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 + %0 = load ptr, ptr %.global_tid..addr, align 8 + %1 = load i32, ptr %0, align 4 + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0) call void @bar() call void @unknown() - %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %3, i64 0) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs1, i64 0) ret void } -define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 ret void } @@ -102,34 +100,33 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) ret void } -declare void @__kmpc_get_shared_variables(i8***) +declare void @__kmpc_get_shared_variables(ptr) -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) define hidden void @bar() { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0) ret void } -define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 ret void } @@ -138,25 +135,25 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) ret void } -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_global_thread_num(ptr) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) -define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 ret void } @@ -165,12 +162,12 @@ entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 0, i32* %.zero.addr, align 4 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) + %global_args = alloca ptr, align 8 + store i32 0, ptr %.zero.addr, align 4 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) ret void } @@ -179,6 +176,6 @@ entry: !llvm.module.flags = !{!2, !3} !0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0} -!1 = !{void ()* @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1} +!1 = !{ptr @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1} !2 = !{i32 7, !"openmp", i32 50} !3 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll index f69f2abe5cc0c9..f76d4876a55dec 100644 --- a/llvm/test/Transforms/OpenMP/icv_remarks.ll +++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll @@ -5,10 +5,10 @@ source_filename = "icv_remarks.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 @1 = private unnamed_addr constant [26 x i8] c";icv_remarks.c;foo;18;1;;\00", align 1 ; CHECK-DAG: remark: icv_remarks.c:12:0: OpenMP ICV nthreads Value: IMPLEMENTATION_DEFINED @@ -17,16 +17,15 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local void @foo(i32 %a) local_unnamed_addr #0 !dbg !17 { entry: %.kmpc_loc.addr = alloca %struct.ident_t, align 8 - %0 = bitcast %struct.ident_t* %.kmpc_loc.addr to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %0, i8* nonnull align 8 dereferenceable(24) bitcast (%struct.ident_t* @0 to i8*), i64 16, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(24) %.kmpc_loc.addr, ptr nonnull align 8 dereferenceable(24) @0, i64 16, i1 false) call void @llvm.dbg.value(metadata i32 %a, metadata !19, metadata !DIExpression()), !dbg !21 tail call void @omp_set_num_threads(i32 %a) #1, !dbg !22 %call = tail call i32 @omp_get_max_threads() #1, !dbg !23 call void @llvm.dbg.value(metadata i32 %call, metadata !20, metadata !DIExpression()), !dbg !21 tail call void @use(i32 %call) #1, !dbg !24 - %1 = getelementptr inbounds %struct.ident_t, %struct.ident_t* %.kmpc_loc.addr, i64 0, i32 4, !dbg !25 - store i8* getelementptr inbounds ([26 x i8], [26 x i8]* @1, i64 0, i64 0), i8** %1, align 8, !dbg !25, !tbaa !26 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull %.kmpc_loc.addr, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) #1, !dbg !25 + %0 = getelementptr inbounds %struct.ident_t, ptr %.kmpc_loc.addr, i64 0, i32 4, !dbg !25 + store ptr @1, ptr %0, align 8, !dbg !25, !tbaa !26 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull %.kmpc_loc.addr, i32 0, ptr @.omp_outlined.) #1, !dbg !25 ret void, !dbg !32 } @@ -39,12 +38,12 @@ declare !dbg !12 dso_local void @use(i32) local_unnamed_addr #2 ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV nthreads Value: IMPLEMENTATION_DEFINED ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV active_levels Value: 0 ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV cancel Value: 0 -define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid.) #3 !dbg !33 { +define internal void @.omp_outlined.(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid.) #3 !dbg !33 { entry: - call void @llvm.dbg.value(metadata i32* %.global_tid., metadata !41, metadata !DIExpression()), !dbg !43 - call void @llvm.dbg.value(metadata i32* %.bound_tid., metadata !42, metadata !DIExpression()), !dbg !43 - call void @llvm.dbg.value(metadata i32* undef, metadata !44, metadata !DIExpression()) #1, !dbg !50 - call void @llvm.dbg.value(metadata i32* undef, metadata !47, metadata !DIExpression()) #1, !dbg !50 + call void @llvm.dbg.value(metadata ptr %.global_tid., metadata !41, metadata !DIExpression()), !dbg !43 + call void @llvm.dbg.value(metadata ptr %.bound_tid., metadata !42, metadata !DIExpression()), !dbg !43 + call void @llvm.dbg.value(metadata ptr undef, metadata !44, metadata !DIExpression()) #1, !dbg !50 + call void @llvm.dbg.value(metadata ptr undef, metadata !47, metadata !DIExpression()) #1, !dbg !50 tail call void @omp_set_num_threads(i32 10) #1, !dbg !52 %call.i = tail call i32 @omp_get_max_threads() #1, !dbg !53 call void @llvm.dbg.value(metadata i32 %call.i, metadata !48, metadata !DIExpression()) #1, !dbg !54 @@ -52,9 +51,9 @@ entry: ret void, !dbg !56 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #4 +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4 -declare !callback !57 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr #1 +declare !callback !57 dso_local void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr #1 declare void @llvm.dbg.value(metadata, metadata, metadata) #5 diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll index c51071742327e0..57c8eaee9cb64e 100644 --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 ; doesn't modify any ICVs. define i32 @icv_free_use(i32 %0) { @@ -45,9 +45,9 @@ define i32 @ok_use_assume(i32 %0) { ret i32 %2 } -define void @indirect_call(void ()* %0) { +define void @indirect_call(ptr %0) { ; CHECK-LABEL: define {{[^@]+}}@indirect_call -; CHECK-SAME: (void ()* [[TMP0:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]]) { ; CHECK-NEXT: call void @omp_set_num_threads(i32 4) ; CHECK-NEXT: tail call void [[TMP0]]() ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @omp_get_max_threads() @@ -68,7 +68,7 @@ define dso_local i32 @foo(i32 %0, i32 %1) { ; CHECK-NEXT: tail call void @omp_set_num_threads(i32 [[TMP1]]) ; CHECK-NEXT: tail call void @use(i32 [[TMP1]]) ; CHECK-NEXT: tail call void @use(i32 [[TMP1]]) -; CHECK-NEXT: tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK-NEXT: tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.) ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: tail call void @use(i32 [[TMP3]]) ; CHECK-NEXT: ret i32 0 @@ -82,7 +82,7 @@ define dso_local i32 @foo(i32 %0, i32 %1) { %6 = tail call i32 @omp_get_max_threads() tail call void @use(i32 %4) tail call void @use(i32 %5) - tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) + tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.) %7 = tail call i32 @omp_get_max_threads() tail call void @use(i32 %7) ret i32 0 @@ -95,9 +95,9 @@ declare dso_local i32 @omp_get_max_threads() declare dso_local void @use(i32) declare dso_local void @no_openmp_use(i32) "no_openmp" -define internal void @.omp_outlined.(i32* %0, i32* %1) { +define internal void @.omp_outlined.(ptr %0, ptr %1) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: tail call void @use(i32 [[TMP4]]) @@ -115,7 +115,7 @@ define internal void @.omp_outlined.(i32* %0, i32* %1) { ret void } -declare !callback !0 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +declare !callback !0 void @__kmpc_fork_call(ptr, i32, ptr, ...) define dso_local i32 @bar(i32 %0, i32 %1) { ; CHECK-LABEL: define {{[^@]+}}@bar @@ -123,7 +123,7 @@ define dso_local i32 @bar(i32 %0, i32 %1) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP0]], i32 [[TMP1]] ; CHECK-NEXT: tail call void @omp_set_num_threads(i32 [[TMP4]]) -; CHECK-NEXT: tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0]], i32 0, ptr @.omp_outlined..1) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: tail call void @use(i32 [[TMP5]]) ; CHECK-NEXT: ret i32 0 @@ -133,15 +133,15 @@ define dso_local i32 @bar(i32 %0, i32 %1) { ; FIXME: getters can be replaced with %4 tail call void @omp_set_num_threads(i32 %4) %5 = tail call i32 @omp_get_max_threads() - tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) + tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..1) %6 = tail call i32 @omp_get_max_threads() tail call void @use(i32 %6) ret i32 0 } -define internal void @.omp_outlined..1(i32* %0, i32* %1) { +define internal void @.omp_outlined..1(ptr %0, ptr %1) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: tail call void @use(i32 [[TMP3]]) ; CHECK-NEXT: tail call void @omp_set_num_threads(i32 10) @@ -166,7 +166,7 @@ define dso_local i32 @bar1(i32 %0, i32 %1) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP0]], i32 [[TMP1]] ; CHECK-NEXT: tail call void @omp_set_num_threads(i32 [[TMP4]]) -; CHECK-NEXT: tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0]], i32 0, ptr @.omp_outlined..2) ; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: tail call void @use(i32 [[TMP5]]) ; CHECK-NEXT: ret i32 0 @@ -175,15 +175,15 @@ define dso_local i32 @bar1(i32 %0, i32 %1) { %4 = select i1 %3, i32 %0, i32 %1 tail call void @omp_set_num_threads(i32 %4) %5 = tail call i32 @omp_get_max_threads() - tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) + tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2) %6 = tail call i32 @omp_get_max_threads() tail call void @use(i32 %6) ret i32 0 } -define internal void @.omp_outlined..2(i32* %0, i32* %1) { +define internal void @.omp_outlined..2(ptr %0, ptr %1) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) { ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @omp_get_max_threads() ; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @icv_free_use(i32 [[TMP3]]) ; CHECK-NEXT: tail call void @omp_set_num_threads(i32 10) @@ -500,9 +500,9 @@ define void @test4(i1 %0) { ret void } -define void @test4_invoke(i1 %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test4_invoke(i1 %0) personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: define {{[^@]+}}@test4_invoke -; CHECK-SAME: (i1 [[TMP0:%.*]]) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-SAME: (i1 [[TMP0:%.*]]) personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: call void @known_unique_icv(i1 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 @maybe_throw(i1 zeroext [[TMP0]]) ; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[EXC:%.*]] @@ -510,8 +510,8 @@ define void @test4_invoke(i1 %0) personality i8* bitcast (i32 (...)* @__gxx_pers ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i1 [[TMP0]], false ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; CHECK: exc: -; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: filter [0 x i8*] zeroinitializer +; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: filter [0 x ptr] zeroinitializer ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @icv_free_use(i32 10) @@ -529,8 +529,8 @@ cont: br i1 %3, label %5, label %4 exc: - %lp = landingpad { i8*, i32 } - filter [0 x i8*] zeroinitializer + %lp = landingpad { ptr, i32 } + filter [0 x ptr] zeroinitializer unreachable 4: ; preds = %1 diff --git a/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll b/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll index b9889f9ba30173..ccdf492c303170 100644 --- a/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll @@ -8,12 +8,12 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @set() { ; CHECK-LABEL: define {{[^@]+}}@set() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @inofth, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @inofth, align 4 ; CHECK-NEXT: call void @omp_set_num_threads(i32 noundef [[TMP0]]) ; CHECK-NEXT: ret void ; entry: - %0 = load i32, i32* @inofth, align 4 + %0 = load i32, ptr @inofth, align 4 call void @omp_set_num_threads(i32 noundef %0) ret void } diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll index e71d99486751ed..3502ef79eaba27 100644 --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -2,14 +2,14 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @is_spmd_exec_mode = weak constant i8 0 @will_be_spmd_exec_mode = weak constant i8 1 @non_spmd_exec_mode = weak constant i8 1 @will_not_be_spmd_exec_mode = weak constant i8 1 @G = external global i8 -@llvm.compiler.used = appending global [4 x i8*] [i8* @is_spmd_exec_mode, i8* @will_be_spmd_exec_mode, i8* @non_spmd_exec_mode, i8* @will_not_be_spmd_exec_mode ], section "llvm.metadata" +@llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata" ;. ; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 @@ -17,45 +17,44 @@ target triple = "nvptx64" ; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x i8*] [i8* @is_spmd_exec_mode, i8* @will_be_spmd_exec_mode, i8* @non_spmd_exec_mode, i8* @will_not_be_spmd_exec_mode], section "llvm.metadata" +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata" ;. define weak void @is_spmd() { ; CHECK-LABEL: define {{[^@]+}}@is_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @is_spmd_helper1() ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @is_spmd_helper1() call void @is_spmd_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } define weak void @will_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: call void @is_spmd_helper2() -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -63,55 +62,54 @@ common.ret: ret void user_code.entry: - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** + %0 = call i32 @__kmpc_global_thread_num(ptr null) call void @is_spmd_helper2() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @non_spmd() { ; CHECK-LABEL: define {{[^@]+}}@non_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @will_not_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define internal void @is_spmd_helper1() { ; CHECK-LABEL: define {{[^@]+}}@is_spmd_helper1() { -; CHECK-NEXT: store i8 1, i8* @G, align 1 +; CHECK-NEXT: store i8 1, ptr @G, align 1 ; CHECK-NEXT: ret void ; %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() - store i8 %isSPMD, i8* @G + store i8 %isSPMD, ptr @G ret void } @@ -135,11 +133,11 @@ f: define internal void @is_generic_helper1() { ; CHECK-LABEL: define {{[^@]+}}@is_generic_helper1() { -; CHECK-NEXT: store i8 0, i8* @G, align 1 +; CHECK-NEXT: store i8 0, ptr @G, align 1 ; CHECK-NEXT: ret void ; %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() - store i8 %isSPMD, i8* @G + store i8 %isSPMD, ptr @G ret void } @@ -168,17 +166,17 @@ f: define internal void @is_mixed_helper() { ; CHECK-LABEL: define {{[^@]+}}@is_mixed_helper() { ; CHECK-NEXT: [[ISSPMD:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() -; CHECK-NEXT: store i8 [[ISSPMD]], i8* @G, align 1 +; CHECK-NEXT: store i8 [[ISSPMD]], ptr @G, align 1 ; CHECK-NEXT: ret void ; %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() - store i8 %isSPMD, i8* @G + store i8 %isSPMD, ptr @G ret void } -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) { +; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -198,10 +196,10 @@ entry: declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable" declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) +declare i32 @__kmpc_global_thread_num(ptr) declare void @foo() declare void @bar() @@ -210,10 +208,10 @@ declare void @bar() !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @is_spmd, !"kernel", i32 1} -!3 = !{void ()* @will_be_spmd, !"kernel", i32 1} -!4 = !{void ()* @non_spmd, !"kernel", i32 1} -!5 = !{void ()* @will_not_be_spmd, !"kernel", i32 1} +!2 = !{ptr @is_spmd, !"kernel", i32 1} +!3 = !{ptr @will_be_spmd, !"kernel", i32 1} +!4 = !{ptr @non_spmd, !"kernel", i32 1} +!5 = !{ptr @will_not_be_spmd, !"kernel", i32 1} ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } @@ -221,8 +219,8 @@ declare void @bar() ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @is_spmd, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @will_be_spmd, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @non_spmd, !"kernel", i32 1} -; CHECK: [[META5:![0-9]+]] = !{void ()* @will_not_be_spmd, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @is_spmd, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @will_be_spmd, !"kernel", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @non_spmd, !"kernel", i32 1} +; CHECK: [[META5:![0-9]+]] = !{ptr @will_not_be_spmd, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index 908224082097de..65c4694384384c 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -3,13 +3,13 @@ ; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 -@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, ptr @.str }, align 8 @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer -@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, ptr @.str }, align 8 ; void delete_parallel_0(void) { ; #pragma omp parallel @@ -26,39 +26,39 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @delete_parallel_0() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn) ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_0() { ; CHECK1-NEXT: entry: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn) ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_0() { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn) ; CHECK2-NEXT: ret void entry: - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.0 to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.1 to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.2 to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.0) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.1) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.2) ret void } -define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined.willreturn(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @unknown() #[[ATTR0]] ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @unknown() #[[ATTR0]] ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @unknown() #[[ATTR0]] ; CHECK2-NEXT: ret void @@ -67,20 +67,20 @@ entry: ret void } -define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn { +define internal void @.omp_outlined.willreturn.0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) willreturn { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] ; CHECK2-NEXT: ret void @@ -89,20 +89,20 @@ entry: ret void } -define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined.willreturn.1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readnone() #[[ATTR0]] ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @readnone() #[[ATTR0]] ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @readnone() #[[ATTR0]] ; CHECK2-NEXT: ret void @@ -111,18 +111,18 @@ entry: ret void } -define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined.willreturn.2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: ret void entry: @@ -144,45 +144,45 @@ entry: define void @delete_parallel_1() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1) ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_1() { ; CHECK1-NEXT: entry: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1) ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_1() { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1) ; CHECK2-NEXT: ret void entry: - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..0) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..1) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2) ret void } -define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @unknown() ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @unknown() ; CHECK2-NEXT: ret void @@ -191,20 +191,20 @@ entry: ret void } -define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined..0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @readonly() #[[ATTR4]] ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @readonly() #[[ATTR4]] ; CHECK2-NEXT: ret void @@ -213,20 +213,20 @@ entry: ret void } -define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined..1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readnone() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: call void @readnone() ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: call void @readnone() ; CHECK2-NEXT: ret void @@ -235,18 +235,18 @@ entry: ret void } -define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined..2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: ret void entry: @@ -282,95 +282,87 @@ define void @delete_parallel_2() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_2() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] -; CHECK-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]] +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]]) ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_2() { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] -; CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]] +; CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]]) ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_2() { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] -; CHECK2-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK2-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]] +; CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]]) ; CHECK2-NEXT: ret void entry: %a = alloca i32, align 4 - %tmp = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) - store i32 0, i32* %a, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a) - %tmp1 = bitcast i32* %a to i8* - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a) + store i32 0, ptr %a, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..3, ptr nonnull %a) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..4, ptr nonnull %a) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..5, ptr nonnull %a) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..6, ptr nonnull %a) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a) ret void } -define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { +define internal void @.omp_outlined..3(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK-NEXT: store i32 [[INC]], ptr [[A]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] ; CHECK1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK1: if.then: -; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[TMP:%.*]] = load i32, ptr [[A]], align 4 ; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP]], 1 -; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK1-NEXT: store i32 [[INC]], ptr [[A]], align 4 ; CHECK1-NEXT: br label [[IF_END]] ; CHECK1: if.end: ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] ; CHECK2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP]], 1 -; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK2-NEXT: store i32 [[INC]], ptr [[A]], align 4 ; CHECK2-NEXT: br label [[IF_END]] ; CHECK2: if.end: ; CHECK2-NEXT: ret void @@ -380,357 +372,333 @@ entry: br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry - %tmp = load i32, i32* %a, align 4 + %tmp = load i32, ptr %a, align 4 %inc = add nsw i32 %tmp, 1 - store i32 %inc, i32* %a, align 4 + store i32 %inc, ptr %a, align 4 br label %if.end if.end: ; preds = %if.then, %entry ret void } -define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { +define internal void @.omp_outlined..4(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK-NEXT: call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK1-NEXT: entry: -; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK1: omp_if.then: -; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK1-NEXT: br label [[OMP_IF_END]] ; CHECK1: omp_if.end: ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK2: omp_if.then: -; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK2-NEXT: br label [[OMP_IF_END]] ; CHECK2: omp_if.end: ; CHECK2-NEXT: ret void entry: - %tmp = load i32, i32* %.global_tid., align 4 - %tmp1 = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 %tmp) + %tmp = load i32, ptr %.global_tid., align 4 + %tmp1 = call i32 @__kmpc_master(ptr nonnull @0, i32 %tmp) %tmp2 = icmp eq i32 %tmp1, 0 br i1 %tmp2, label %omp_if.end, label %omp_if.then omp_if.then: ; preds = %entry - %tmp3 = load i32, i32* %a, align 4 + %tmp3 = load i32, ptr %a, align 4 %inc = add nsw i32 %tmp3, 1 - store i32 %inc, i32* %a, align 4 - call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 %tmp) + store i32 %inc, ptr %a, align 4 + call void @__kmpc_end_master(ptr nonnull @0, i32 %tmp) br label %omp_if.end omp_if.end: ; preds = %entry, %omp_if.then ret void } -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) declare i32 @omp_get_thread_num() inaccessiblememonly nofree nosync nounwind readonly -declare i32 @__kmpc_master(%struct.ident_t*, i32) +declare i32 @__kmpc_master(ptr, i32) -declare void @__kmpc_end_master(%struct.ident_t*, i32) +declare void @__kmpc_end_master(ptr, i32) -define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { +define internal void @.omp_outlined..5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR9]] -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR9]] +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK-NEXT: call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK1-NEXT: entry: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] -; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK1-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK1: omp_if.then: -; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK1-NEXT: br label [[OMP_IF_END]] ; CHECK1: omp_if.end: -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] -; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK2-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK2: omp_if.then: -; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: store i32 [[INC]], ptr [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK2-NEXT: br label [[OMP_IF_END]] ; CHECK2: omp_if.end: -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: ret void entry: - %omp_global_thread_num = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - %tmp = load i32, i32* %.global_tid., align 4 - %tmp1 = call i32 @__kmpc_single(%struct.ident_t* nonnull @0, i32 %tmp) + %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr nonnull @0) + %tmp = load i32, ptr %.global_tid., align 4 + %tmp1 = call i32 @__kmpc_single(ptr nonnull @0, i32 %tmp) %tmp2 = icmp eq i32 %tmp1, 0 br i1 %tmp2, label %omp_if.end, label %omp_if.then omp_if.then: ; preds = %entry - %tmp3 = load i32, i32* %a, align 4 + %tmp3 = load i32, ptr %a, align 4 %inc = add nsw i32 %tmp3, 1 - store i32 %inc, i32* %a, align 4 - call void @__kmpc_end_single(%struct.ident_t* nonnull @0, i32 %tmp) + store i32 %inc, ptr %a, align 4 + call void @__kmpc_end_single(ptr nonnull @0, i32 %tmp) br label %omp_if.end omp_if.end: ; preds = %entry, %omp_if.then - call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %omp_global_thread_num) #6 + call void @__kmpc_barrier(ptr nonnull @1, i32 %omp_global_thread_num) #6 ret void } -define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { +define internal void @.omp_outlined..6(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR14:[0-9]+]] -; CHECK-NEXT: store i32 1, i32* [[A1]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** -; CHECK-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR14:[0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[A1]], align 4 +; CHECK-NEXT: store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] ; CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] ; CHECK-NEXT: ] ; CHECK: .omp.reduction.case1: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK: .omp.reduction.case2: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4 ; CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK: .omp.reduction.default: -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]]) ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -; CHECK1-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] -; CHECK1-NEXT: store i32 1, i32* [[A1]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** -; CHECK1-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 -; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK1-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +; CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR0]] +; CHECK1-NEXT: store i32 1, ptr [[A1]], align 4 +; CHECK1-NEXT: store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8 +; CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK1-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] ; CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] ; CHECK1-NEXT: ] ; CHECK1: .omp.reduction.case1: -; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 -; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +; CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 ; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 -; CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK1: .omp.reduction.case2: -; CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 -; CHECK1-NEXT: [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4 +; CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +; CHECK1-NEXT: [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4 ; CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK1: .omp.reduction.default: -; CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) +; CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]]) ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -; CHECK2-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] -; CHECK2-NEXT: store i32 1, i32* [[A1]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** -; CHECK2-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 -; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +; CHECK2-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR0]] +; CHECK2-NEXT: store i32 1, ptr [[A1]], align 4 +; CHECK2-NEXT: store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK2-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] ; CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] ; CHECK2-NEXT: ] ; CHECK2: .omp.reduction.case1: -; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 -; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +; CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK2-NEXT: store i32 [[ADD]], i32* [[A]], align 4 -; CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK2: .omp.reduction.case2: -; CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 -; CHECK2-NEXT: [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4 +; CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +; CHECK2-NEXT: [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4 ; CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK2: .omp.reduction.default: -; CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) +; CHECK2-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]]) ; CHECK2-NEXT: ret void entry: %a1 = alloca i32, align 4 - %.omp.reduction.red_list = alloca [1 x i8*], align 8 - %tmp = bitcast i32* %a1 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp) - store i32 1, i32* %a1, align 4 - %tmp1 = bitcast [1 x i8*]* %.omp.reduction.red_list to i32** - store i32* %a1, i32** %tmp1, align 8 - %tmp2 = load i32, i32* %.global_tid., align 4 - %tmp3 = bitcast [1 x i8*]* %.omp.reduction.red_list to i8* - %tmp4 = call i32 @__kmpc_reduce_nowait(%struct.ident_t* nonnull @2, i32 %tmp2, i32 1, i64 8, i8* nonnull %tmp3, void (i8*, i8*)* nonnull @.omp.reduction.reduction_func, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var) + %.omp.reduction.red_list = alloca [1 x ptr], align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a1) + store i32 1, ptr %a1, align 4 + store ptr %a1, ptr %.omp.reduction.red_list, align 8 + %tmp2 = load i32, ptr %.global_tid., align 4 + %tmp4 = call i32 @__kmpc_reduce_nowait(ptr nonnull @2, i32 %tmp2, i32 1, i64 8, ptr nonnull %.omp.reduction.red_list, ptr nonnull @.omp.reduction.reduction_func, ptr nonnull @.gomp_critical_user_.reduction.var) switch i32 %tmp4, label %.omp.reduction.default [ i32 1, label %.omp.reduction.case1 i32 2, label %.omp.reduction.case2 ] .omp.reduction.case1: ; preds = %entry - %tmp5 = load i32, i32* %a, align 4 - %tmp6 = load i32, i32* %a1, align 4 + %tmp5 = load i32, ptr %a, align 4 + %tmp6 = load i32, ptr %a1, align 4 %add = add nsw i32 %tmp5, %tmp6 - store i32 %add, i32* %a, align 4 - call void @__kmpc_end_reduce_nowait(%struct.ident_t* nonnull @2, i32 %tmp2, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var) + store i32 %add, ptr %a, align 4 + call void @__kmpc_end_reduce_nowait(ptr nonnull @2, i32 %tmp2, ptr nonnull @.gomp_critical_user_.reduction.var) br label %.omp.reduction.default .omp.reduction.case2: ; preds = %entry - %tmp7 = load i32, i32* %a1, align 4 - %tmp8 = atomicrmw add i32* %a, i32 %tmp7 monotonic + %tmp7 = load i32, ptr %a1, align 4 + %tmp8 = atomicrmw add ptr %a, i32 %tmp7 monotonic br label %.omp.reduction.default .omp.reduction.default: ; preds = %.omp.reduction.case2, %.omp.reduction.case1, %entry - %tmp9 = bitcast i32* %a1 to i8* - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp9) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a1) ret void } -define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) { +define internal void @.omp.reduction.reduction_func(ptr %arg, ptr %arg1) { ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { +; CHECK-SAME: (ptr nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** -; CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32** -; CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -; CHECK1-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { +; CHECK1-SAME: (ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK1-NEXT: entry: -; CHECK1-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** -; CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32** -; CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 -; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8 +; CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8 +; CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +; CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 ; CHECK1-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -; CHECK2-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { +; CHECK2-SAME: (ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** -; CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32** -; CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 -; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8 +; CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8 +; CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] -; CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 ; CHECK2-NEXT: ret void entry: - %tmp = bitcast i8* %arg1 to i32** - %tmp2 = load i32*, i32** %tmp, align 8 - %tmp3 = bitcast i8* %arg to i32** - %tmp4 = load i32*, i32** %tmp3, align 8 - %tmp5 = load i32, i32* %tmp4, align 4 - %tmp6 = load i32, i32* %tmp2, align 4 + %tmp2 = load ptr, ptr %arg1, align 8 + %tmp4 = load ptr, ptr %arg, align 8 + %tmp5 = load i32, ptr %tmp4, align 4 + %tmp6 = load i32, ptr %tmp2, align 4 %add = add nsw i32 %tmp5, %tmp6 - store i32 %add, i32* %tmp4, align 4 + store i32 %add, ptr %tmp4, align 4 ret void } -declare i32 @__kmpc_single(%struct.ident_t*, i32) +declare i32 @__kmpc_single(ptr, i32) -declare void @__kmpc_end_single(%struct.ident_t*, i32) +declare void @__kmpc_end_single(ptr, i32) -declare void @__kmpc_barrier(%struct.ident_t*, i32) +declare void @__kmpc_barrier(ptr, i32) -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) nofree nosync nounwind readonly +declare i32 @__kmpc_global_thread_num(ptr) nofree nosync nounwind readonly -declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) +declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr) -declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) +declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr) -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) -declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) +declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...) declare void @unknown() diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll index 54f27637d9ecda..bb764c2767f403 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -4,10 +4,10 @@ source_filename = "parallel_deletion_remarks.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 ; void delete_parallel(void) { ; #pragma omp parallel @@ -26,14 +26,14 @@ target triple = "x86_64-pc-linux-gnu" ; CHECK: remark: parallel_deletion_remarks.c:12:1: Removing parallel region with no side-effects. ; CHECK: remark: parallel_deletion_remarks.c:14:1: Removing parallel region with no side-effects. define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 { - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)), !dbg !20 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)), !dbg !21 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.), !dbg !18 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2), !dbg !19 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..4), !dbg !20 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..6), !dbg !21 ret void, !dbg !22 } -declare !callback !23 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr +declare !callback !23 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr ; Function Attrs: willreturn declare !dbg !4 void @unknown_willreturn(...) #0 @@ -44,22 +44,22 @@ declare !dbg !7 void @readonly_willreturn(...) #1 ; Function Attrs: readnone willreturn declare !dbg !8 void @readnone_willreturn(...) #2 -define internal void @.omp_outlined.(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !25 { +define internal void @.omp_outlined.(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !25 { call void (...) @unknown_willreturn(), !dbg !36 ret void, !dbg !36 } -define internal void @.omp_outlined..2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !37 { +define internal void @.omp_outlined..2(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !37 { call void (...) @readonly_willreturn(), !dbg !41 ret void, !dbg !41 } -define internal void @.omp_outlined..4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !42 { +define internal void @.omp_outlined..4(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !42 { call void (...) @readnone_willreturn(), !dbg !46 ret void, !dbg !46 } -define internal void @.omp_outlined..6(i32* noalias nocapture %0, i32* noalias nocapture %1) !dbg !47 { +define internal void @.omp_outlined..6(ptr noalias nocapture %0, ptr noalias nocapture %1) !dbg !47 { ret void, !dbg !51 } diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll index 898356842b189f..654f7b486283a3 100644 --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -2,74 +2,74 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @no_spmd_exec_mode = weak constant i8 1 @spmd_exec_mode = weak constant i8 0 @parallel_exec_mode = weak constant i8 0 @G = external global i8 -@llvm.compiler.used = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata" +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata" ;. define weak void @none_spmd() { ; CHECK-LABEL: define {{[^@]+}}@none_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) ; CHECK-NEXT: call void @none_spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) call void @none_spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } define weak void @spmd() { ; CHECK-LABEL: define {{[^@]+}}@spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } define weak void @parallel() { ; CHECK-LABEL: define {{[^@]+}}@parallel() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) + %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) call void @spmd_helper() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) + call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0) + call void @__kmpc_target_deinit(ptr null, i8 2) ret void } define internal void @mixed_helper() { ; CHECK-LABEL: define {{[^@]+}}@mixed_helper() { ; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level() -; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1 +; CHECK-NEXT: store i8 [[LEVEL]], ptr @G, align 1 ; CHECK-NEXT: ret void ; %level = call i8 @__kmpc_parallel_level() - store i8 %level, i8* @G + store i8 %level, ptr @G ret void } @@ -98,17 +98,17 @@ f: define internal void @spmd_helper() { ; CHECK-LABEL: define {{[^@]+}}@spmd_helper() { -; CHECK-NEXT: store i8 1, i8* @G, align 1 +; CHECK-NEXT: store i8 1, ptr @G, align 1 ; CHECK-NEXT: ret void ; %level = call i8 @__kmpc_parallel_level() - store i8 %level, i8* @G + store i8 %level, ptr @G ret void } -define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) { +define internal void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51 -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8** [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]], ptr [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: call void @parallel_helper() ; CHECK-NEXT: ret void ; @@ -119,34 +119,34 @@ define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i define internal void @parallel_helper() { ; CHECK-LABEL: define {{[^@]+}}@parallel_helper() { ; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level() -; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1 +; CHECK-NEXT: store i8 [[LEVEL]], ptr @G, align 1 ; CHECK-NEXT: ret void ; %level = call i8 @__kmpc_parallel_level() - store i8 %level, i8* @G + store i8 %level, ptr @G ret void } declare void @foo() declare void @bar() declare i8 @__kmpc_parallel_level() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1 +declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1 !llvm.module.flags = !{!0, !1} !nvvm.annotations = !{!2, !3, !4} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @none_spmd, !"kernel", i32 1} -!3 = !{void ()* @spmd, !"kernel", i32 1} -!4 = !{void ()* @parallel, !"kernel", i32 1} +!2 = !{ptr @none_spmd, !"kernel", i32 1} +!3 = !{ptr @spmd, !"kernel", i32 1} +!4 = !{ptr @parallel, !"kernel", i32 1} ;. ; CHECK: attributes #[[ATTR0]] = { alwaysinline } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @none_spmd, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @spmd, !"kernel", i32 1} -; CHECK: [[META4:![0-9]+]] = !{void ()* @parallel, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1} +; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll index f25003218eb481..1ab939b547bb17 100644 --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -57,7 +57,7 @@ ; } ; use(a); ; } -; void merge_seq_float(float f, float *p) { +; void merge_seq_float(float f, ptr p) { ; #pragma omp parallel ; { ; use(f); @@ -228,34 +228,34 @@ ; } target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 define dso_local void @merge(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined., ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..1, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined.(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } declare dso_local void @use(i32) local_unnamed_addr -declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr +declare !callback !1 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr -define internal void @.omp_outlined..1(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..1(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -263,28 +263,28 @@ entry: define dso_local void @unmergable_proc_bind(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - store i32 %a, i32* %a.addr, align 4 - call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) + store i32 %a, ptr %a.addr, align 4 + call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..2, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..3, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..2(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..2(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr +declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr -declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) local_unnamed_addr +declare void @__kmpc_push_proc_bind(ptr, i32, i32) local_unnamed_addr -define internal void @.omp_outlined..3(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..3(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -292,26 +292,26 @@ entry: define dso_local void @unmergable_num_threads(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - store i32 %a, i32* %a.addr, align 4 - call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %a) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) + store i32 %a, ptr %a.addr, align 4 + call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %a) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..4, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..5, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..4(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..4(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) local_unnamed_addr +declare void @__kmpc_push_num_threads(ptr, i32, i32) local_unnamed_addr -define internal void @.omp_outlined..5(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..5(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -319,25 +319,25 @@ entry: define dso_local void @unmergable_seq_call(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..6, ptr nonnull %a.addr) call void (...) @foo() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..7, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..6(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..6(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } declare dso_local void @foo(...) local_unnamed_addr -define internal void @.omp_outlined..7(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..7(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -345,54 +345,54 @@ entry: define dso_local void @merge_seq(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %0 = load i32, i32* %a.addr, align 4 + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..8, ptr nonnull %a.addr) + %0 = load i32, ptr %a.addr, align 4 %add = add nsw i32 %0, 1 - store i32 %add, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %1 = load i32, i32* %a.addr, align 4 + store i32 %add, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..9, ptr nonnull %a.addr) + %1 = load i32, ptr %a.addr, align 4 call void @use(i32 %1) ret void } -define internal void @.omp_outlined..8(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..8(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..9(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..9(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define dso_local void @merge_seq_float(float %f, float* nocapture %p) local_unnamed_addr { +define dso_local void @merge_seq_float(float %f, ptr nocapture %p) local_unnamed_addr { entry: %f.addr = alloca float, align 4 - store float %f, float* %f.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), float* nonnull %f.addr) - %0 = load float, float* %f.addr, align 4 + store float %f, ptr %f.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..10, ptr nonnull %f.addr) + %0 = load float, ptr %f.addr, align 4 %add = fadd float %0, 0x40091EB860000000 - store float %add, float* %p, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), float* nonnull %f.addr) + store float %add, ptr %p, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..11, ptr nonnull %f.addr) ret void } -define internal void @.omp_outlined..10(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., float* nocapture nonnull readonly align 4 dereferenceable(4) %f) { +define internal void @.omp_outlined..10(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f) { entry: - %0 = load float, float* %f, align 4 + %0 = load float, ptr %f, align 4 %conv = fptosi float %0 to i32 call void @use(i32 %conv) ret void } -define internal void @.omp_outlined..11(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., float* nocapture nonnull readonly align 4 dereferenceable(4) %f) { +define internal void @.omp_outlined..11(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f) { entry: - %0 = load float, float* %f, align 4 + %0 = load float, ptr %f, align 4 %conv = fptosi float %0 to i32 call void @use(i32 %conv) ret void @@ -401,26 +401,26 @@ entry: define dso_local void @merge_seq_firstprivate(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %0 = load i32, i32* %a.addr, align 4 + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..12, ptr nonnull %a.addr) + %0 = load i32, ptr %a.addr, align 4 %add = add nsw i32 %0, 1 - store i32 %add, i32* %a.addr, align 4 + store i32 %add, ptr %a.addr, align 4 %a.casted.sroa.0.0.insert.ext = zext i32 %add to i64 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 %a.casted.sroa.0.0.insert.ext) - %1 = load i32, i32* %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..13, i64 %a.casted.sroa.0.0.insert.ext) + %1 = load i32, ptr %a.addr, align 4 call void @use(i32 %1) ret void } -define internal void @.omp_outlined..12(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..12(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..13(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %a) { +define internal void @.omp_outlined..13(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., i64 %a) { entry: %a.addr.sroa.0.0.extract.trunc = trunc i64 %a to i32 call void @use(i32 %a.addr.sroa.0.0.extract.trunc) @@ -431,32 +431,31 @@ define dso_local void @merge_seq_sink_lt(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 %b = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %0 = bitcast i32* %b to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) - %1 = ptrtoint i32* %b to i64 - %2 = trunc i64 %1 to i32 - store i32 %2, i32* %b, align 4 - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..14, ptr nonnull %a.addr) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b) + %0 = ptrtoint ptr %b to i64 + %1 = trunc i64 %0 to i32 + store i32 %1, ptr %b, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..15, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..14(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..14(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) -define internal void @.omp_outlined..15(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..15(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -465,30 +464,29 @@ define dso_local void @merge_seq_par_use(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 %b = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %0 = bitcast i32* %b to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) - %1 = load i32, i32* %a.addr, align 4 - %add = add nsw i32 %1, 1 - store i32 %add, i32* %b, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* nonnull %a.addr, i32* nonnull %b) - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..16, ptr nonnull %a.addr) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b) + %0 = load i32, ptr %a.addr, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr %b, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 2, ptr @.omp_outlined..17, ptr nonnull %a.addr, ptr nonnull %b) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b) ret void } -define internal void @.omp_outlined..16(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..16(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..17(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a, i32* nocapture nonnull readonly align 4 dereferenceable(4) %b) { +define internal void @.omp_outlined..17(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a, ptr nocapture nonnull readonly align 4 dereferenceable(4) %b) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) - %1 = load i32, i32* %b, align 4 + %1 = load i32, ptr %b, align 4 call void @use(i32 %1) ret void } @@ -497,39 +495,39 @@ define dso_local void @merge_cancellable_regions(i32 %cancel1, i32 %cancel2) loc entry: %cancel1.addr = alloca i32, align 4 %cancel2.addr = alloca i32, align 4 - store i32 %cancel1, i32* %cancel1.addr, align 4 - store i32 %cancel2, i32* %cancel2.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + store i32 %cancel1, ptr %cancel1.addr, align 4 + store i32 %cancel2, ptr %cancel2.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..18, ptr nonnull %cancel1.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..19, ptr nonnull %cancel2.addr) ret void } -define internal void @.omp_outlined..18(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +define internal void @.omp_outlined..18(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { entry: - %0 = load i32, i32* %cancel1, align 4 + %0 = load i32, ptr %cancel1, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %if.end, label %if.then if.then: ; preds = %entry - %1 = load i32, i32* %.global_tid., align 4 - %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + %1 = load i32, ptr %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1) ret void if.end: ; preds = %entry ret void } -declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr +declare i32 @__kmpc_cancel(ptr, i32, i32) local_unnamed_addr -define internal void @.omp_outlined..19(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +define internal void @.omp_outlined..19(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { entry: - %0 = load i32, i32* %cancel2, align 4 + %0 = load i32, ptr %cancel2, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %if.end, label %if.then if.then: ; preds = %entry - %1 = load i32, i32* %.global_tid., align 4 - %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + %1 = load i32, ptr %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1) ret void if.end: ; preds = %entry @@ -540,41 +538,41 @@ define dso_local void @merge_cancellable_regions_seq(i32 %cancel1, i32 %cancel2) entry: %cancel1.addr = alloca i32, align 4 %cancel2.addr = alloca i32, align 4 - store i32 %cancel1, i32* %cancel1.addr, align 4 - store i32 %cancel2, i32* %cancel2.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr) - %0 = load i32, i32* %cancel1.addr, align 4 + store i32 %cancel1, ptr %cancel1.addr, align 4 + store i32 %cancel2, ptr %cancel2.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..20, ptr nonnull %cancel1.addr) + %0 = load i32, ptr %cancel1.addr, align 4 %tobool.not = icmp eq i32 %0, 0 %lnot.ext = zext i1 %tobool.not to i32 - store i32 %lnot.ext, i32* %cancel2.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr) + store i32 %lnot.ext, ptr %cancel2.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..21, ptr nonnull %cancel2.addr) ret void } -define internal void @.omp_outlined..20(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { +define internal void @.omp_outlined..20(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1) { entry: - %0 = load i32, i32* %cancel1, align 4 + %0 = load i32, ptr %cancel1, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %if.end, label %if.then if.then: ; preds = %entry - %1 = load i32, i32* %.global_tid., align 4 - %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + %1 = load i32, ptr %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1) ret void if.end: ; preds = %entry ret void } -define internal void @.omp_outlined..21(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { +define internal void @.omp_outlined..21(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2) { entry: - %0 = load i32, i32* %cancel2, align 4 + %0 = load i32, ptr %cancel2, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %if.end, label %if.then if.then: ; preds = %entry - %1 = load i32, i32* %.global_tid., align 4 - %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1) + %1 = load i32, ptr %.global_tid., align 4 + %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1) ret void if.end: ; preds = %entry @@ -584,30 +582,30 @@ if.end: ; preds = %entry define dso_local void @merge_3(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..22, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..23, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..24, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..22(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..22(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..23(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..23(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..24(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..24(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -615,35 +613,35 @@ entry: define dso_local void @merge_3_seq(i32 %a, i32 %b) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %0 = load i32, i32* %a.addr, align 4 + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..25, ptr nonnull %a.addr) + %0 = load i32, ptr %a.addr, align 4 %add = add nsw i32 %0, 1 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %1 = load i32, i32* %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..26, ptr nonnull %a.addr) + %1 = load i32, ptr %a.addr, align 4 %add1 = add nsw i32 %add, %1 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..27, ptr nonnull %a.addr) call void @use(i32 %add1) ret void } -define internal void @.omp_outlined..25(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..25(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..26(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..26(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..27(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..27(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -651,32 +649,32 @@ entry: define dso_local void @unmergable_3_seq_call(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..28, ptr nonnull %a.addr) call void (...) @foo() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..29, ptr nonnull %a.addr) call void (...) @foo() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..30, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..28(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..28(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..29(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..29(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..30(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..30(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -684,32 +682,32 @@ entry: define dso_local void @unmergable_3_proc_bind(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..31, ptr nonnull %a.addr) + call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..32, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..33, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..31(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..31(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..32(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..32(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..33(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..33(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -717,33 +715,33 @@ entry: define dso_local void @unmergable_3_num_threads(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - %1 = load i32, i32* %a.addr, align 4 - call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %1) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..34, ptr nonnull %a.addr) + %1 = load i32, ptr %a.addr, align 4 + call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %1) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..35, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..36, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..34(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..34(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..35(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..35(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..36(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..36(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -751,31 +749,31 @@ entry: define dso_local void @merge_2_unmergable_1(i32 %a) local_unnamed_addr { entry: %a.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + store i32 %a, ptr %a.addr, align 4 + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..37, ptr nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..38, ptr nonnull %a.addr) call void (...) @foo() - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nonnull %a.addr) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..39, ptr nonnull %a.addr) ret void } -define internal void @.omp_outlined..37(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..37(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..38(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..38(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } -define internal void @.omp_outlined..39(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) { +define internal void @.omp_outlined..39(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a) { entry: - %0 = load i32, i32* %a, align 4 + %0 = load i32, ptr %a, align 4 call void @use(i32 %0) ret void } @@ -791,11 +789,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -804,22 +802,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -828,126 +826,126 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -958,36 +956,36 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 -; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -996,31 +994,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1031,26 +1029,26 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 -; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: store float [[ADD]], ptr [[P]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void @@ -1059,47 +1057,47 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1110,25 +1108,25 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) @@ -1137,11 +1135,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1150,32 +1148,32 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1186,28 +1184,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use @@ -1215,48 +1212,46 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1267,28 +1262,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[B]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions @@ -1296,12 +1290,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1310,22 +1304,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1334,26 +1328,26 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -1362,12 +1356,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1376,31 +1370,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1411,37 +1405,37 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -1449,11 +1443,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1462,25 +1456,25 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1489,21 +1483,21 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq @@ -1512,58 +1506,58 @@ entry: ; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK: omp_region.end4: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split.split: -; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1574,143 +1568,143 @@ entry: ; CHECK: omp_region.body5: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK: seq.par.merged2: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] -; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK: omp_region.body5.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: br label [[OMP_REGION_END4]] ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1718,25 +1712,25 @@ entry: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1745,32 +1739,32 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1779,22 +1773,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1803,126 +1797,126 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1933,36 +1927,36 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 -; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1971,31 +1965,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2006,26 +2000,26 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 -; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: store float [[ADD]], ptr [[P]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void @@ -2034,47 +2028,47 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2085,25 +2079,25 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) @@ -2112,11 +2106,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2125,32 +2119,32 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2161,28 +2155,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use @@ -2190,48 +2183,46 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2242,28 +2233,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[B]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions @@ -2271,12 +2261,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2285,22 +2275,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2309,26 +2299,26 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -2337,12 +2327,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2351,31 +2341,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2386,37 +2376,37 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -2424,11 +2414,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2437,25 +2427,25 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2464,21 +2454,21 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq @@ -2487,58 +2477,58 @@ entry: ; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK: omp_region.end4: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split.split: -; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2549,143 +2539,143 @@ entry: ; CHECK: omp_region.body5: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK: seq.par.merged2: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] -; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK: omp_region.body5.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: br label [[OMP_REGION_END4]] ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2693,25 +2683,25 @@ entry: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2720,32 +2710,32 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2754,22 +2744,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2778,126 +2768,126 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2908,36 +2898,36 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 -; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2946,31 +2936,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -2981,26 +2971,26 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 -; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: store float [[ADD]], ptr [[P]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void @@ -3009,47 +2999,47 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3060,25 +3050,25 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) @@ -3087,11 +3077,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3100,32 +3090,32 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3136,28 +3126,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use @@ -3165,48 +3154,46 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3217,28 +3204,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[B]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions @@ -3246,12 +3232,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3260,22 +3246,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3284,26 +3270,26 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -3312,12 +3298,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3326,31 +3312,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3361,37 +3347,37 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -3399,11 +3385,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3412,25 +3398,25 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3439,21 +3425,21 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq @@ -3462,58 +3448,58 @@ entry: ; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK: omp_region.end4: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split.split: -; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3524,143 +3510,143 @@ entry: ; CHECK: omp_region.body5: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK: seq.par.merged2: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] -; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK: omp_region.body5.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: br label [[OMP_REGION_END4]] ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3668,25 +3654,25 @@ entry: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3695,32 +3681,32 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3729,22 +3715,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3753,126 +3739,126 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3883,36 +3869,36 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 -; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -3921,31 +3907,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -3956,26 +3942,26 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 -; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: store float [[ADD]], ptr [[P]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void @@ -3984,47 +3970,47 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4035,25 +4021,25 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) @@ -4062,11 +4048,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4075,32 +4061,32 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4111,28 +4097,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use @@ -4140,48 +4125,46 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4192,28 +4175,27 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[B]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions @@ -4221,12 +4203,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4235,22 +4217,22 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4259,26 +4241,26 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -4287,12 +4269,12 @@ entry: ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4301,31 +4283,31 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4336,37 +4318,37 @@ entry: ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -4374,11 +4356,11 @@ entry: ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4387,25 +4369,25 @@ entry: ; CHECK: entry.split.split: ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4414,21 +4396,21 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq @@ -4437,58 +4419,58 @@ entry: ; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK: omp.par.exit.split: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: -; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK: omp_region.end4: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split.split: -; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4499,143 +4481,143 @@ entry: ; CHECK: omp_region.body5: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK: seq.par.merged2: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] -; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK: omp_region.body5.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: br label [[OMP_REGION_END4]] ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK: seq.par.merged: -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4643,25 +4625,25 @@ entry: ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK: omp.par.outlined.exit.exitStub: ; CHECK-NEXT: ret void ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4670,36 +4652,36 @@ entry: ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; CHECK2-LABEL: define {{[^@]+}}@merge ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4710,22 +4692,22 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4738,17 +4720,17 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -4757,26 +4739,26 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -4785,26 +4767,26 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -4813,25 +4795,25 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: call void (...) @foo() -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -4839,52 +4821,52 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_seq ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK2: omp.par.exit.split: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4895,53 +4877,53 @@ entry: ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK2-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 -; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 +; CHECK2-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: store float [[F]], ptr [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2 -; CHECK2-NEXT: store float* [[P]], float** [[GEP_P]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[F_RELOADED]], ptr [[GEP_F_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[F_ADDR]], ptr [[GEP_F_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store ptr [[P]], ptr [[GEP_P]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_float..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4952,36 +4934,36 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8 +; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load ptr, ptr [[GEP_F_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load ptr, ptr [[GEP_F_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load ptr, ptr [[GEP_P]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[LOADGEP_F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, float*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, float*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4993,30 +4975,30 @@ entry: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: ; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000 -; CHECK2-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4 +; CHECK2-NEXT: store float [[ADD]], ptr [[LOADGEP_P]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK2-NEXT: call void @use(i32 [[CONV]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load float, ptr [[F]], align 4 ; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK2-NEXT: call void @use(i32 [[CONV]]) ; CHECK2-NEXT: ret void @@ -5025,58 +5007,58 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_firstprivate..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK2: omp.par.exit.split: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5087,31 +5069,31 @@ entry: ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK2-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) @@ -5121,15 +5103,15 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5140,32 +5122,32 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5176,34 +5158,33 @@ entry: ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) -; CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64 -; CHECK2-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK2-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 -; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) +; CHECK2-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]]) +; CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK2-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +; CHECK2-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5211,66 +5192,64 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 ; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 -; CHECK2-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8 -; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store ptr [[B]], ptr [[GEP_B]], align 8 +; CHECK2-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_par_use..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK2: omp.par.exit.split: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: -; CHECK2-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK2-NEXT: call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8 +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load ptr, ptr [[GEP_B]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5281,33 +5260,32 @@ entry: ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8* ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[LOADGEP_B]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP1]]) ; CHECK2-NEXT: ret void ; @@ -5315,19 +5293,19 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 ; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5338,24 +5316,24 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5368,28 +5346,28 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: ; CHECK2-NEXT: ret void @@ -5398,23 +5376,23 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 ; CHECK2-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 +; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[CANCEL1]], ptr [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 -; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[CANCEL1_RELOADED]], ptr [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions_seq..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5425,36 +5403,36 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load ptr, ptr [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5467,40 +5445,40 @@ entry: ; CHECK2: seq.par.merged: ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[LNOT_EXT]], ptr [[LOADGEP_CANCEL2_ADDR]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4 ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4 ; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK2: if.then: -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK2-NEXT: ret void ; CHECK2: if.end: ; CHECK2-NEXT: ret void @@ -5509,15 +5487,15 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_3 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5528,25 +5506,25 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5559,25 +5537,25 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5585,81 +5563,81 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr, ptr }, align 8 ; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 -; CHECK2-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3 -; CHECK2-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 3 +; CHECK2-NEXT: store ptr [[ADD1_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3_seq..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] ; CHECK2: omp.par.exit.split: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: -; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 -; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 -; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3 -; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 3 +; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 ; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5670,50 +5648,50 @@ entry: ; CHECK2: omp_region.body5: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK2: seq.par.merged2: -; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]] -; CHECK2-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: store i32 [[ADD1]], ptr [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK2: omp_region.body5.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: ; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: store i32 [[ADD]], ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5722,35 +5700,35 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: call void (...) @foo() -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: call void (...) @foo() -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5759,35 +5737,35 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5796,35 +5774,35 @@ entry: ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; @@ -5832,15 +5810,15 @@ entry: ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { ptr }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 -; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5848,27 +5826,27 @@ entry: ; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK2: entry.split.split: ; CHECK2-NEXT: call void (...) @foo() -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: -; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 -; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void (ptr, ptr, ...) @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5881,25 +5859,25 @@ entry: ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; ; ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK2-NEXT: entry: -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 8d8bb5197d94eb..046f39ebed5040 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -12,57 +12,57 @@ target triple = "nvptx64" ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. ; UTC_ARGS: --enable -@S = external local_unnamed_addr global i8* +@S = external local_unnamed_addr global ptr -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. ;. -; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* +; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr ;. -; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* +; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr ;. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLED-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false) ; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR0]] ; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true) call void @foo() call void @bar() call void @convert_and_move_alloca() call void @unknown_no_openmp() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) + call void @__kmpc_target_deinit(ptr nonnull null, i8 1) ret void } @@ -80,9 +80,9 @@ define internal void @foo() { ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 - call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %0, i64 4) + %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !12 + call void @use(ptr %0) + call void @__kmpc_free_shared(ptr %0, i64 4) ret void } @@ -90,34 +90,34 @@ define internal void @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar ; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !13 - call void @share(i8* %0), !dbg !13 - call void @__kmpc_free_shared(i8* %0, i64 4) + %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !13 + call void @share(ptr %0), !dbg !13 + call void @__kmpc_free_shared(ptr %0, i64 4) ret void } -define internal void @use(i8* %x) { +define internal void @use(ptr %x) { ; CHECK-LABEL: define {{[^@]+}}@use -; CHECK-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@use -; CHECK-DISABLED-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-DISABLED-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: ret void ; @@ -125,21 +125,21 @@ entry: ret void } -define internal void @share(i8* %x) { +define internal void @share(ptr %x) { ; CHECK-LABEL: define {{[^@]+}}@share -; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@share -; CHECK-DISABLED-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-DISABLED-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-DISABLED-NEXT: ret void ; entry: - store i8* %x, i8** @S + store ptr %x, ptr @S ret void } @@ -147,20 +147,20 @@ define void @unused() { ; CHECK-LABEL: define {{[^@]+}}@unused() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 -; CHECK-NEXT: call void @use(i8* undef) +; CHECK-NEXT: call void @use(ptr undef) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @use(i8* [[TMP0]]) -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @use(ptr [[TMP0]]) +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14 - call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %0, i64 4) + %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !14 + call void @use(ptr %0) + call void @__kmpc_free_shared(ptr %0, i64 4) ret void } @@ -172,15 +172,15 @@ define internal void @convert_and_move_alloca() { ; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[INITLOOP:%.*]] ; CHECK: initloop: -; CHECK-NEXT: store i32 0, i32* [[IV_PTR]], align 4 +; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4 ; CHECK-NEXT: br label [[LOOPBODY:%.*]] ; CHECK: loopbody: -; CHECK-NEXT: [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4 +; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10 ; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]] ; CHECK: loopinc: ; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]], align 4 +; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4 ; CHECK-NEXT: br label [[LOOPBODY]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -192,15 +192,15 @@ define internal void @convert_and_move_alloca() { ; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]] ; CHECK-DISABLED: initloop: -; CHECK-DISABLED-NEXT: store i32 0, i32* [[IV_PTR]], align 4 +; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4 ; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]] ; CHECK-DISABLED: loopbody: -; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4 +; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10 ; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]] ; CHECK-DISABLED: loopinc: ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 -; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[IV_PTR]], align 4 +; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4 ; CHECK-DISABLED-NEXT: br label [[LOOPBODY]] ; CHECK-DISABLED: exit: ; CHECK-DISABLED-NEXT: ret void @@ -208,36 +208,36 @@ define internal void @convert_and_move_alloca() { entry: %iv_ptr = alloca i32, align 4 %ub_ptr = alloca i32, align 4 - store i32 10, i32* %ub_ptr + store i32 10, ptr %ub_ptr br label %initloop initloop: - store i32 0, i32* %iv_ptr - %ub = load i32, i32* %ub_ptr + store i32 0, ptr %iv_ptr + %ub = load i32, ptr %ub_ptr br label %loopbody loopbody: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !16 - call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %0, i64 4) - %iv = load i32, i32* %iv_ptr + %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !16 + call void @use(ptr %0) + call void @__kmpc_free_shared(ptr %0, i64 4) + %iv = load i32, ptr %iv_ptr %1 = icmp eq i32 %iv, %ub br i1 %1, label %exit, label %loopinc loopinc: %inc = add i32 %iv, 1 - store i32 %inc, i32* %iv_ptr + store i32 %inc, ptr %iv_ptr br label %loopbody exit: ret void } -; CHECK: declare i8* @__kmpc_alloc_shared(i64) -declare i8* @__kmpc_alloc_shared(i64) +; CHECK: declare ptr @__kmpc_alloc_shared(i64) +declare ptr @__kmpc_alloc_shared(i64) -; CHECK: declare void @__kmpc_free_shared(i8* allocptr nocapture, i64) -declare void @__kmpc_free_shared(i8*, i64) +; CHECK: declare void @__kmpc_free_shared(ptr allocptr nocapture, i64) +declare void @__kmpc_free_shared(ptr, i64) declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" @@ -250,7 +250,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{void ()* @kernel, !"kernel", i32 1} +!5 = !{ptr @kernel, !"kernel", i32 1} !6 = !{i32 7, !"openmp", i32 50} !7 = !{i32 7, !"openmp-device", i32 50} !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) @@ -284,7 +284,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1} +; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} ; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9) ; CHECK: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ; CHECK: [[META10:![0-9]+]] = !DISubroutineType(types: !2) @@ -296,7 +296,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1} +; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} ; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9) ; CHECK-DISABLED: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index 8344f36504772b..102c1161800208 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -13,11 +13,11 @@ target triple = "nvptx64" ; CHECK-LIMIT: remark: replace_globalization.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization ; UTC_ARGS: --enable -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } -@S = external local_unnamed_addr global i8* +@S = external local_unnamed_addr global ptr @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @foo_exec_mode = weak constant i8 1 @bar_exec_mode = weak constant i8 1 @baz_spmd_exec_mode = weak constant i8 2 @@ -25,79 +25,70 @@ target triple = "nvptx64" define dso_local void @foo() "kernel" { entry: - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) - %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) + %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) call void @unknown_no_openmp() - %x_on_stack = bitcast i8* %x to i32* - %0 = bitcast i32* %x_on_stack to i8* - call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %x, i64 4) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + call void @use(ptr %x) + call void @__kmpc_free_shared(ptr %x, i64 4) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void } define void @bar() "kernel" { - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) call void @unknown_no_openmp() %cmp = icmp eq i32 %c, -1 br i1 %cmp, label %master1, label %exit master1: - %x = call align 4 i8* @__kmpc_alloc_shared(i64 16), !dbg !11 - %x_on_stack = bitcast i8* %x to [4 x i32]* - %a0 = bitcast [4 x i32]* %x_on_stack to i8* - call void @use(i8* %a0) - call void @__kmpc_free_shared(i8* %x, i64 16) + %x = call align 4 ptr @__kmpc_alloc_shared(i64 16), !dbg !11 + call void @use(ptr %x) + call void @__kmpc_free_shared(ptr %x, i64 16) br label %next next: call void @unknown_no_openmp() %b0 = icmp eq i32 %c, -1 br i1 %b0, label %master2, label %exit master2: - %y = call align 4 i8* @__kmpc_alloc_shared(i64 4), !dbg !12 - %y_on_stack = bitcast i8* %y to [4 x i32]* - %b1 = bitcast [4 x i32]* %y_on_stack to i8* - call void @use(i8* %b1) - call void @__kmpc_free_shared(i8* %y, i64 4) + %y = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12 + call void @use(ptr %y) + call void @__kmpc_free_shared(ptr %y, i64 4) br label %exit exit: - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void } define void @baz_spmd() "kernel" { - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true) + %c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true) call void @unknown_no_openmp() %c0 = icmp eq i32 %c, -1 br i1 %c0, label %master3, label %exit master3: - %z = call align 4 i8* @__kmpc_alloc_shared(i64 24), !dbg !12 - %z_on_stack = bitcast i8* %z to [6 x i32]* - %c1 = bitcast [6 x i32]* %z_on_stack to i8* - call void @use(i8* %c1) - call void @__kmpc_free_shared(i8* %z, i64 24) + %z = call align 4 ptr @__kmpc_alloc_shared(i64 24), !dbg !12 + call void @use(ptr %z) + call void @__kmpc_free_shared(ptr %z, i64 24) br label %exit exit: - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2) + call void @__kmpc_target_deinit(ptr @1, i8 2) ret void } -define void @use(i8* %x) { +define void @use(ptr %x) { entry: - store i8* %x, i8** @S + store ptr %x, ptr @S ret void } @offset =global i32 undef @stack = internal addrspace(3) global [1024 x i8] undef -define private i8* @__kmpc_alloc_shared(i64) { - %bc = bitcast [1024 x i8] addrspace(3) * @stack to i8 addrspace(3) * - %ac = addrspacecast i8 addrspace(3) * %bc to i8* - %l = load i32, i32* @offset - %gep = getelementptr i8, i8* %ac, i32 %l - ret i8* %gep +define private ptr @__kmpc_alloc_shared(i64) { + %ac = addrspacecast ptr addrspace(3) @stack to ptr + %l = load i32, ptr @offset + %gep = getelementptr i8, ptr %ac, i32 %l + ret ptr %gep } -declare void @__kmpc_free_shared(i8*, i64) +declare void @__kmpc_free_shared(ptr, i64) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -106,11 +97,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" @@ -125,17 +116,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{void ()* @foo, !"kernel", i32 1} -!8 = !{void ()* @bar, !"kernel", i32 1} -!13 = !{void ()* @baz_spmd, !"kernel", i32 1} +!7 = !{ptr @foo, !"kernel", i32 1} +!8 = !{ptr @bar, !"kernel", i32 1} +!13 = !{ptr @baz_spmd, !"kernel", i32 1} !9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !10 = !DISubroutineType(types: !2) !11 = !DILocation(line: 5, column: 7, scope: !9) !12 = !DILocation(line: 5, column: 14, scope: !9) ;. -; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* +; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 @@ -147,77 +138,77 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) -; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@baz_spmd ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] +; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; ; ; CHECK: Function Attrs: nofree norecurse nounwind memory(write) ; CHECK-LABEL: define {{[^@]+}}@use.internalized -; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@use -; CHECK-SAME: (i8* [[X:%.*]]) { +; CHECK-SAME: (ptr [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 +; CHECK-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-NEXT: ret void ; ; ; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]] -; CHECK-NEXT: ret i8* [[GEP]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr @offset, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspacecast (ptr addrspace(3) @stack to ptr), i32 [[L]] +; CHECK-NEXT: ret ptr [[GEP]] ; ; ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ;. @@ -236,9 +227,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META7:![0-9]+]] = !{void ()* @foo, !"kernel", i32 1} -; CHECK: [[META8:![0-9]+]] = !{void ()* @bar, !"kernel", i32 1} -; CHECK: [[META9:![0-9]+]] = !{void ()* @baz_spmd, !"kernel", i32 1} +; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1} +; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1} +; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1} ; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: !11) ; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2) diff --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll index 94ae5d798be460..b3e760f5c500bb 100644 --- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll +++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll @@ -4,45 +4,45 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 -@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, ptr @.str }, align 8 define i32 @main() { entry: - call void (%struct.ident_t*, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) + call void (ptr, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, ptr @.omp_outlined.) ret i32 0 } ; Only the last runtime call will be matched due that the rest of the "runtime function" calls ; have some type mismatch compared to the real runtime function. See the check at bottom. -define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { entry: - call void @__kmpc_master(%struct.ident_t* nonnull @0) - call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 0, i32 0) - call void @__kmpc_barrier(%struct.ident_t* nonnull @1, float 0.0) + call void @__kmpc_master(ptr nonnull @0) + call void @__kmpc_end_master(ptr nonnull @0, i32 0, i32 0) + call void @__kmpc_barrier(ptr nonnull @1, float 0.0) call void @omp_get_thread_num() - call void @__kmpc_flush(%struct.ident_t* nonnull @0) + call void @__kmpc_flush(ptr nonnull @0) ret void } ; Fewer arguments than expected in variadic function. -declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, void (i32*, i32*, ...)*, ...) +declare !callback !2 void @__kmpc_fork_call(ptr, ptr, ...) ; Fewer number of arguments in non variadic function. -declare void @__kmpc_master(%struct.ident_t*) +declare void @__kmpc_master(ptr) ; Bigger number of arguments in non variadic function. -declare void @__kmpc_end_master(%struct.ident_t*, i32, i32) +declare void @__kmpc_end_master(ptr, i32, i32) ; Different argument type than the expected. -declare void @__kmpc_barrier(%struct.ident_t*, float) +declare void @__kmpc_barrier(ptr, float) ; Proper use of runtime function. -declare void @__kmpc_flush(%struct.ident_t*) +declare void @__kmpc_flush(ptr) ; Different return type. declare void @omp_get_thread_num() diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index 4edff601bcc252..16ccd2d726d364 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -3,10 +3,10 @@ ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [1 x i8] c"\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @kernel_exec_mode = weak constant i8 1 @@ -15,7 +15,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. define void @kernel() { - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false) + %call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -23,7 +23,7 @@ if.then: if.else: br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) + call void @__kmpc_target_deinit(ptr null, i8 1) ret void } @@ -104,11 +104,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.amdgcn.workitem.id.x() -declare void @__kmpc_kernel_prepare_parallel(i8*) +declare void @__kmpc_kernel_prepare_parallel(ptr) -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) +declare i32 @__kmpc_target_init(ptr, i8, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) attributes #0 = { cold noinline } @@ -123,7 +123,7 @@ attributes #0 = { cold noinline } !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} -!7 = !{void ()* @kernel, !"kernel", i32 1} +!7 = !{ptr @kernel, !"kernel", i32 1} !8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !10 = !DISubroutineType(types: !2) diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index a088755d734b69..daa77cc3fdafb9 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -86,61 +86,61 @@ ;; } ;; } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } %struct.kmp_task_t_with_privates = type { %struct.kmp_task_t } -%struct.kmp_task_t = type { i8*, i32 (i32, i8*)*, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } -%union.kmp_cmplrdata_t = type { i32 (i32, i8*)* } +%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } +%union.kmp_cmplrdata_t = type { ptr } %struct.anon = type {} @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1 @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" ; Function Attrs: alwaysinline convergent norecurse nounwind ;. ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" -; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" ; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -150,14 +150,14 @@ ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. ; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" ; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -197,16 +197,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug @@ -214,25 +214,25 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -242,19 +242,19 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) +; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) @@ -266,7 +266,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -274,19 +274,19 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -296,18 +296,18 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) +; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) @@ -319,7 +319,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -327,16 +327,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -344,20 +344,20 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 0, i32* %.zero.addr, align 4 - store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 - call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 0, ptr %.zero.addr, align 4 + store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 + call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6 + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -367,16 +367,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -386,16 +385,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -405,16 +403,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -424,14 +421,13 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 br label %for.cond for.cond: ; preds = %for.body, %entry @@ -444,35 +440,34 @@ for.cond.cleanup: ; preds = %for.cond ret void for.body: ; preds = %for.cond - %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0) + %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 + call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0) %inc = add nsw i32 %i.0, 1 br label %for.cond, !llvm.loop !22 } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void @@ -489,9 +484,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -499,9 +494,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -509,9 +504,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -519,19 +514,19 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: ret void ; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 %1, i32* %.addr1, align 4, !tbaa !18 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #6 + %global_args = alloca ptr, align 8 + store i32 %1, ptr %.addr1, align 4, !tbaa !18 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6 ret void } @@ -542,16 +537,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 @@ -559,25 +554,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -587,19 +582,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) +; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) @@ -611,7 +606,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -619,19 +614,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -641,18 +636,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) +; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) @@ -664,7 +659,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -672,16 +667,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -689,24 +684,23 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 0, i32* %.zero.addr, align 4 - store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 - call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 0, ptr %.zero.addr, align 4 + store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 + call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6 + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* -; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* -; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr +; AMDGPU-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -716,19 +710,17 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* -; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -738,20 +730,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* -; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* -; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr +; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -761,19 +751,17 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* -; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -783,17 +771,15 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) - %x_on_stack = bitcast i8* %x to i32* - call void @use(i32* nocapture %x_on_stack) #10 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) + call void @use(ptr nocapture %x) #10 br label %for.cond for.cond: ; preds = %for.body, %entry @@ -803,38 +789,37 @@ for.cond: ; preds = %for.body, %entry for.cond.cleanup: ; preds = %for.cond call void @spmd_amenable() #10 - call void @__kmpc_free_shared(i8* %x, i64 4) + call void @__kmpc_free_shared(ptr %x, i64 4) ret void for.body: ; preds = %for.cond - %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) + %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 + call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0) %inc = add nsw i32 %i.0, 1 br label %for.cond, !llvm.loop !25 } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void @@ -851,9 +836,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -861,9 +846,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -871,9 +856,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -881,19 +866,19 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: ret void ; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 %1, i32* %.addr1, align 4, !tbaa !18 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #6 + %global_args = alloca ptr, align 8 + store i32 %1, ptr %.addr1, align 4, !tbaa !18 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6 ret void } @@ -905,16 +890,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 @@ -922,25 +907,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -950,19 +935,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) @@ -974,7 +959,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -982,19 +967,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1004,18 +989,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) @@ -1027,7 +1012,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1035,16 +1020,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1052,20 +1037,20 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 0, i32* %.zero.addr, align 4 - store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 - call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 0, ptr %.zero.addr, align 4 + store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 + call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6 + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1075,18 +1060,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1096,18 +1079,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1117,18 +1098,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) +; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1138,18 +1117,15 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) +; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [1 x i8*], align 8 - %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) - %x_on_stack = bitcast i8* %x to i32* + %captured_vars_addrs = alloca [1 x ptr], align 8 + %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) br label %for.cond for.cond: ; preds = %for.body, %entry @@ -1159,61 +1135,59 @@ for.cond: ; preds = %for.body, %entry for.cond.cleanup: ; preds = %for.cond call void @spmd_amenable() #10 - call void @__kmpc_free_shared(i8* %x, i64 4) + call void @__kmpc_free_shared(ptr %x, i64 4) ret void for.body: ; preds = %for.cond - %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 - store i8* %x, i8** %0, align 8, !tbaa !26 - %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 - %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 1) + store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26 + %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 + call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1) %inc = add nsw i32 %i.0, 1 br label %for.cond, !llvm.loop !28 } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { +define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: - %0 = load i32, i32* %x, align 4, !tbaa !18 + %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %x, align 4, !tbaa !18 + store i32 %inc, ptr %x, align 4, !tbaa !18 call void @unknown() #11 ret void } @@ -1225,12 +1199,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1238,12 +1211,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1251,12 +1223,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1264,25 +1235,23 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: ret void ; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 %1, i32* %.addr1, align 4, !tbaa !18 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - %2 = load i8**, i8*** %global_args, align 8 - %3 = bitcast i8** %2 to i32** - %4 = load i32*, i32** %3, align 8, !tbaa !26 - call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 + %global_args = alloca ptr, align 8 + store i32 %1, ptr %.addr1, align 4, !tbaa !18 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + %2 = load ptr, ptr %global_args, align 8 + %3 = load ptr, ptr %2, align 8, !tbaa !26 + call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6 ret void } @@ -1293,16 +1262,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 @@ -1310,25 +1279,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1338,19 +1307,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) @@ -1362,7 +1331,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1370,19 +1339,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1392,18 +1361,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) @@ -1415,7 +1384,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1423,16 +1392,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1440,33 +1409,32 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 0, i32* %.zero.addr, align 4 - store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 - call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 0, ptr %.zero.addr, align 4 + store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 + call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6 + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] ; AMDGPU: region.check.tid: ; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() ; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; AMDGPU: region.guarded: -; AMDGPU-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] ; AMDGPU: region.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] ; AMDGPU: region.exit: ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1478,31 +1446,28 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) +; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] +; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 ; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] ; NVPTX: region.check.tid: ; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() ; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; NVPTX: region.guarded: -; NVPTX-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] ; NVPTX: region.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] ; NVPTX: region.exit: ; NVPTX-NEXT: br label [[FOR_COND:%.*]] @@ -1514,20 +1479,17 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) +; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] +; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* -; AMDGPU-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1537,20 +1499,17 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) +; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* -; NVPTX-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +; NVPTX-DISABLED-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -1560,19 +1519,16 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) +; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; entry: - %captured_vars_addrs = alloca [1 x i8*], align 8 - %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) - %x_on_stack = bitcast i8* %x to i32* - store i32 42, i32* %x_on_stack, align 4, !tbaa !18 + %captured_vars_addrs = alloca [1 x ptr], align 8 + %x = call align 4 ptr @__kmpc_alloc_shared(i64 4) + store i32 42, ptr %x, align 4, !tbaa !18 br label %for.cond for.cond: ; preds = %for.body, %entry @@ -1582,62 +1538,60 @@ for.cond: ; preds = %for.body, %entry for.cond.cleanup: ; preds = %for.cond call void @spmd_amenable() #10 - call void @__kmpc_free_shared(i8* %x, i64 4) + call void @__kmpc_free_shared(ptr %x, i64 4) ret void for.body: ; preds = %for.cond - %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 - store i8* %x, i8** %0, align 8, !tbaa !26 - %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 - %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 1) + store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26 + %0 = load i32, ptr %.global_tid., align 4, !tbaa !18 + call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1) %inc = add nsw i32 %i.0, 1 br label %for.cond, !llvm.loop !29 } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { +define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] +; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] +; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 -; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] +; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: - %0 = load i32, i32* %x, align 4, !tbaa !18 + %0 = load i32, ptr %x, align 4, !tbaa !18 %inc = add nsw i32 %0, 1 - store i32 %inc, i32* %x, align 4, !tbaa !18 - call void @unknowni32p(i32* %x) #11 + store i32 %inc, ptr %x, align 4, !tbaa !18 + call void @unknowni32p(ptr %x) #11 ret void } @@ -1648,12 +1602,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1661,12 +1614,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1674,12 +1626,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1687,25 +1638,23 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** -; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: ret void ; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 %1, i32* %.addr1, align 4, !tbaa !18 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - %2 = load i8**, i8*** %global_args, align 8 - %3 = bitcast i8** %2 to i32** - %4 = load i32*, i32** %3, align 8, !tbaa !26 - call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 + %global_args = alloca ptr, align 8 + store i32 %1, ptr %.addr1, align 4, !tbaa !18 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + %2 = load ptr, ptr %global_args, align 8 + %3 = load ptr, ptr %2, align 8, !tbaa !26 + call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6 ret void } @@ -1714,10 +1663,10 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1727,12 +1676,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void @@ -1745,7 +1694,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1753,18 +1702,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -1774,11 +1723,11 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void @@ -1791,7 +1740,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1799,18 +1748,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1820,12 +1769,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void @@ -1838,7 +1787,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1846,18 +1795,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1867,11 +1816,11 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void @@ -1884,7 +1833,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1892,15 +1841,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1908,36 +1857,36 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - store i32 0, i32* %.zero.addr, align 4 - store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 - call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + store i32 0, ptr %.zero.addr, align 4 + store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18 + call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6 + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void @@ -1952,9 +1901,9 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; AMDGPU-SAME: () #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1964,19 +1913,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU: worker_state_machine.finished: ; AMDGPU-NEXT: ret void ; AMDGPU: worker_state_machine.is_active.check: ; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU: worker_state_machine.parallel_region.check: -; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID ; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU: worker_state_machine.parallel_region.execute: ; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) @@ -1988,7 +1937,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1996,21 +1945,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] -; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] -; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] +; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; NVPTX-SAME: () #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2020,18 +1967,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX: worker_state_machine.finished: ; NVPTX-NEXT: ret void ; NVPTX: worker_state_machine.is_active.check: ; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX: worker_state_machine.parallel_region.check: -; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID ; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX: worker_state_machine.parallel_region.execute: ; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) @@ -2043,7 +1990,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2051,21 +1998,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] -; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] -; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] +; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) -; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -2075,19 +2020,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 -; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]]) +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 +; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.finished: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) +; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) @@ -2099,7 +2044,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2107,21 +2052,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -2131,18 +2074,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.finished: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) +; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) @@ -2154,7 +2097,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2162,18 +2105,16 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2181,38 +2122,36 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) - %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 %1, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) - %3 = bitcast i8* %2 to %struct.kmp_task_t_with_privates* - %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2) - %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + %1 = call i32 @__kmpc_global_thread_num(ptr @1) + %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") + %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2) + call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0) + call void @__kmpc_target_deinit(ptr @1, i8 1) br label %common.ret } ; Function Attrs: alwaysinline convergent nounwind -define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { +define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 { ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void @@ -2223,100 +2162,96 @@ entry: } ; Function Attrs: convergent norecurse nounwind -define internal i32 @"_omp_task_entry$"(i32 %0, %struct.kmp_task_t_with_privates* noalias %1) #3 { +define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 { entry: - %2 = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* %1, i32 0, i32 0 - %3 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 2 - %4 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 0 - %5 = load i8*, i8** %4, align 8, !tbaa !30 - %6 = bitcast i8* %5 to %struct.anon* - %7 = bitcast %struct.kmp_task_t_with_privates* %1 to i8* - call void @.omp_outlined.(i32 %0, i32* %3, i8* null, void (i8*, ...)* null, i8* %7, %struct.anon* %6) #6 + %2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2 + %3 = load ptr, ptr %1, align 8, !tbaa !30 + call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6 ret i32 0 } ; Function Attrs: nounwind -declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #6 +declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6 ; Function Attrs: nounwind -declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #6 +declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6 ; Function Attrs: nosync nounwind -declare void @__kmpc_free_shared(i8* nocapture, i64) #8 +declare void @__kmpc_free_shared(ptr nocapture, i64) #8 ; Function Attrs: nofree nosync nounwind -declare i8* @__kmpc_alloc_shared(i64) #7 +declare ptr @__kmpc_alloc_shared(i64) #7 ; Function Attrs: convergent -declare void @use(i32* nocapture) #5 +declare void @use(ptr nocapture) #5 ; Function Attrs: convergent declare void @unknown() #2 -declare void @unknowni32p(i32*) #2 +declare void @unknowni32p(ptr) #2 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; AMDGPU-DISABLED-NEXT: ret i32 0 ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; NVPTX-DISABLED-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_get_shared_variables(i8***) +declare void @__kmpc_get_shared_variables(ptr) ; Function Attrs: alwaysinline -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4 +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 ; Function Attrs: convergent declare void @spmd_amenable() #5 ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6 +declare i32 @__kmpc_global_thread_num(ptr) #6 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: alwaysinline convergent norecurse nounwind -define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void @@ -2333,9 +2268,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2343,9 +2278,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2353,9 +2288,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2363,19 +2298,19 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: ret void ; entry: %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i32 %1, i32* %.addr1, align 4, !tbaa !18 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__9(i32* %.addr1, i32* %.zero.addr) #6 + %global_args = alloca ptr, align 8 + store i32 %1, ptr %.addr1, align 4, !tbaa !18 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6 ret void } @@ -2405,12 +2340,12 @@ attributes #11 = { convergent } !3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} !4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} !5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -!6 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -!7 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -!8 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -!9 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -!10 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -!11 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +!6 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +!7 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +!8 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +!9 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +!10 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +!11 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} !12 = !{i32 1, !"wchar_size", i32 4} !13 = !{i32 7, !"openmp", i32 50} !14 = !{i32 7, !"openmp-device", i32 50} @@ -2491,12 +2426,12 @@ attributes #11 = { convergent } ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} ; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} @@ -2525,12 +2460,12 @@ attributes #11 = { convergent } ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} ; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} @@ -2559,12 +2494,12 @@ attributes #11 = { convergent } ; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} ; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} @@ -2593,12 +2528,12 @@ attributes #11 = { convergent } ; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} ; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} ; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} -; NVPTX-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} -; NVPTX-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} -; NVPTX-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} -; NVPTX-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} -; NVPTX-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} -; NVPTX-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} +; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} +; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} +; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} +; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} +; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} +; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} ; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 92cb0a861132fd..d550ce201293de 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -12,33 +12,33 @@ target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" ; Function Attrs: alwaysinline convergent norecurse nounwind ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" -; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. -define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { +define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 -; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR3]] ; CHECK-NEXT: [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] ; CHECK: region.check.tid: @@ -46,22 +46,21 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store double [[CALL_I]], double* [[X]], align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]] ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -69,21 +68,20 @@ common.ret: ; preds = %entry, %user_code.e ret void user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) %call.i = call double @__nv_sin(double 0x400921FB54442D18) #6 - store double %call.i, double* %x, align 8, !tbaa !8 - %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0 - call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3 + store double %call.i, ptr %x, align 8, !tbaa !8 + call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn -define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 { +define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias nocapture [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr noalias nocapture [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -96,25 +94,25 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** nonnull [[GLOBAL_ARGS]]) #[[ATTR3]] +; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; entry: - %global_args = alloca i8**, align 8 - call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3 + %global_args = alloca ptr, align 8 + call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3 ret void } -declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr +declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 +declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 ; Function Attrs: alwaysinline -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4 +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: convergent declare double @__nv_sin(double) local_unnamed_addr #5 @@ -133,7 +131,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } !llvm.ident = !{!7} !0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} -!1 = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} +!1 = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -155,7 +153,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } ; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} -; CHECK: [[META1:![0-9]+]] = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} +; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} ; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index 68805e39190d0b..8d8b333a49a541 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -34,59 +34,59 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" ; Function Attrs: convergent norecurse nounwind ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" -; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. -define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) #0 { +define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6 -; CHECK-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8 ; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR6]] -; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]] +; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 ; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32 ; CHECK-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32 -; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM_I]] +; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] ; CHECK: region.check.tid: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store i32 0, i32* [[X]], align 4, !noalias !8 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1_I]], align 4, !noalias !8 -; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], i32* [[ARRAYIDX2_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias !8 +; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias !8 ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: -; CHECK-NEXT: call void @usei8ptr(i8* nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]] +; CHECK-NEXT: call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]] ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK: for.cond.i: ; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 2, [[REGION_EXIT]] ], [ [[INC_I:%.*]], [[REGION_EXIT3:%.*]] ] @@ -96,91 +96,91 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK: for.body.i: ; CHECK-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1 ; CHECK-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM4_I]] +; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]] ; CHECK: region.check.tid5: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]] ; CHECK: region.guarded4: -; CHECK-NEXT: store i32 [[SUB3_I]], i32* [[ARRAYIDX5_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias !8 ; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]] ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] ; CHECK: region.barrier2: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4]]) ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 ; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: __omp_outlined__.exit: -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** null, i64 0) -; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8 +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0) +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8 ; CHECK-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM6_I]] +; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]] ; CHECK: region.check.tid10: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]] ; CHECK: region.guarded9: -; CHECK-NEXT: store i32 [[CALL_I]], i32* [[ARRAYIDX7_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias !8 ; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]] ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] ; CHECK: region.barrier7: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: -; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM9_I]] +; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]] ; CHECK: region.check.tid15: ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]] ; CHECK: region.guarded14: -; CHECK-NEXT: store i32 [[CALL8_I]], i32* [[ARRAYIDX10_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias !8 ; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]] ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] ; CHECK: region.barrier12: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: -; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM12_I]] +; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]] ; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]] ; CHECK: region.check.tid20: ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]] ; CHECK: region.guarded19: -; CHECK-NEXT: store i32 [[CALL11_I]], i32* [[ARRAYIDX13_I]], align 4, !noalias !8 +; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias !8 ; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]] ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] ; CHECK: region.barrier17: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: -; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR6]] +; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR6]] ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6 -; CHECK-DISABLED-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-DISABLED-SAME: (ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8 -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: is_worker_check: @@ -190,11 +190,11 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: -; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.finished: ; CHECK-DISABLED-NEXT: ret void @@ -211,21 +211,21 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; CHECK-DISABLED: worker_state_machine.done.barrier: -; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; CHECK-DISABLED: thread.user_code.check: ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR6]] -; CHECK-DISABLED-NEXT: store i32 0, i32* [[X]], align 4, !noalias !8 -; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 -; CHECK-DISABLED-NEXT: store i32 1, i32* [[ARRAYIDX1_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]] +; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1 +; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias !8 ; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32 ; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32 -; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM_I]] -; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], i32* [[ARRAYIDX2_I]], align 4, !noalias !8 -; CHECK-DISABLED-NEXT: call void @usei8ptr(i8* nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]] +; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]] +; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]] ; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK-DISABLED: for.cond.i: ; CHECK-DISABLED-NEXT: [[I_0_I:%.*]] = phi i32 [ 2, [[USER_CODE_ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -235,50 +235,50 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK-DISABLED: for.body.i: ; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1 ; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64 -; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM4_I]] -; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], i32* [[ARRAYIDX5_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]] +; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias !8 ; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 ; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-DISABLED: __omp_outlined__.exit: -; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** null, i64 0) -; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8 +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0) +; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8 ; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64 -; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM6_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], i32* [[ARRAYIDX7_I]], align 4, !noalias !8 -; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]] +; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64 -; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM9_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], i32* [[ARRAYIDX10_I]], align 4, !noalias !8 -; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]] +; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64 -; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM12_I]] -; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], i32* [[ARRAYIDX13_I]], align 4, !noalias !8 -; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1) #[[ATTR6]] +; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]] +; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias !8 +; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8 +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 1) #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; entry: %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) - store i32 0, i32* %x, align 4, !noalias !8 - %arrayidx1.i = getelementptr inbounds i32, i32* %x, i64 1 - store i32 1, i32* %arrayidx1.i, align 4, !noalias !8 + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) + store i32 0, ptr %x, align 4, !noalias !8 + %arrayidx1.i = getelementptr inbounds i32, ptr %x, i64 1 + store i32 1, ptr %arrayidx1.i, align 4, !noalias !8 %sext = shl i64 %N, 32 %idxprom.i = ashr exact i64 %sext, 32 - %arrayidx2.i = getelementptr inbounds i32, i32* %x, i64 %idxprom.i - store i32 %N.addr.sroa.0.0.extract.trunc, i32* %arrayidx2.i, align 4, !noalias !8 - %heap2stack = call align 8 i8* @__kmpc_alloc_shared(i64 8) - call void @usei8ptr(i8* nocapture %heap2stack) willreturn nounwind - call void @__kmpc_free_shared(i8* %heap2stack, i64 8) + %arrayidx2.i = getelementptr inbounds i32, ptr %x, i64 %idxprom.i + store i32 %N.addr.sroa.0.0.extract.trunc, ptr %arrayidx2.i, align 4, !noalias !8 + %heap2stack = call align 8 ptr @__kmpc_alloc_shared(i64 8) + call void @usei8ptr(ptr nocapture %heap2stack) willreturn nounwind + call void @__kmpc_free_shared(ptr %heap2stack, i64 8) br label %for.cond.i for.cond.i: ; preds = %for.body.i, %user_code.entry @@ -290,42 +290,42 @@ for.cond.i: ; preds = %for.body.i, %user_c for.body.i: ; preds = %for.cond.i %sub3.i = add nsw i32 %i.0.i, -1 %idxprom4.i = zext i32 %i.0.i to i64 - %arrayidx5.i = getelementptr inbounds i32, i32* %x, i64 %idxprom4.i - store i32 %sub3.i, i32* %arrayidx5.i, align 4, !noalias !8 + %arrayidx5.i = getelementptr inbounds i32, ptr %x, i64 %idxprom4.i + store i32 %sub3.i, ptr %arrayidx5.i, align 4, !noalias !8 %inc.i = add nuw nsw i32 %i.0.i, 1 br label %for.cond.i, !llvm.loop !11 __omp_outlined__.exit: ; preds = %for.cond.i - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** null, i64 0) - %call.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 + call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0) + %call.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 %idxprom6.i = sext i32 %call.i to i64 - %arrayidx7.i = getelementptr inbounds i32, i32* %x, i64 %idxprom6.i - store i32 %call.i, i32* %arrayidx7.i, align 4, !noalias !8 - %call8.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 + %arrayidx7.i = getelementptr inbounds i32, ptr %x, i64 %idxprom6.i + store i32 %call.i, ptr %arrayidx7.i, align 4, !noalias !8 + %call8.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 %idxprom9.i = sext i32 %call8.i to i64 - %arrayidx10.i = getelementptr inbounds i32, i32* %x, i64 %idxprom9.i - store i32 %call8.i, i32* %arrayidx10.i, align 4, !noalias !8 - %call11.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 + %arrayidx10.i = getelementptr inbounds i32, ptr %x, i64 %idxprom9.i + store i32 %call8.i, ptr %arrayidx10.i, align 4, !noalias !8 + %call11.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 %idxprom12.i = sext i32 %call11.i to i64 - %arrayidx13.i = getelementptr inbounds i32, i32* %x, i64 %idxprom12.i - store i32 %call11.i, i32* %arrayidx13.i, align 4, !noalias !8 - %call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 - %call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 - %call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3 + %arrayidx13.i = getelementptr inbounds i32, ptr %x, i64 %idxprom12.i + store i32 %call11.i, ptr %arrayidx13.i, align 4, !noalias !8 + %call14.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 + %call15.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 + %call16.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8 + call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3 ret void worker.exit: ; preds = %entry ret void } -define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { +define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) { +; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; CHECK-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) { +; CHECK-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) { ; CHECK-DISABLED-NEXT: ret void ; ret void @@ -342,35 +342,35 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) { ret void } -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLED-NEXT: ret i32 0 ; ret i32 0 } ; Function Attrs: convergent -declare i32 @no_openmp(i32*) #1 +declare i32 @no_openmp(ptr) #1 ; Function Attrs: convergent nounwind readonly willreturn declare void @pure() #2 -declare i8* @__kmpc_alloc_shared(i64) -declare void @__kmpc_free_shared(i8* nocapture, i64) -declare void @usei8ptr(i8*) #1 +declare ptr @__kmpc_alloc_shared(i64) +declare void @__kmpc_free_shared(ptr nocapture, i64) +declare void @usei8ptr(ptr) #1 ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 +declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn declare void @llvm.experimental.noalias.scope.decl(metadata) #4 @@ -388,7 +388,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame !llvm.ident = !{!7} !0 = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -!1 = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} +!1 = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} !2 = !{i32 1, !"wchar_size", i32 4} !3 = !{i32 7, !"openmp", i32 50} !4 = !{i32 7, !"openmp-device", i32 50} @@ -426,7 +426,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -; CHECK: [[META1:![0-9]+]] = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} +; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} ; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} @@ -440,7 +440,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK: [[META12:![0-9]+]] = !{!"llvm.loop.mustprogress"} ;. ; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0} -; CHECK-DISABLED: [[META1:![0-9]+]] = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} +; CHECK-DISABLED: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1} ; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index 42ba49cfb70f99..c65f8d8c8179a4 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -29,45 +29,45 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1 @__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1 -@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 -@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" ;. ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 -; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" -; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. ; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 ; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 -; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" +; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata" ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef ;. define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -75,8 +75,8 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLE-SPMDIZATION: is_worker_check: @@ -86,11 +86,11 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] ; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr +; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] ; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void @@ -107,26 +107,26 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel() ; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]]) ; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; CHECK-DISABLE-SPMDIZATION: thread.user_code.check: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @spmd_helper() #5 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -134,26 +134,26 @@ worker.exit: ; preds = %entry } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-NEXT: ret i32 0 ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-DISABLE-SPMDIZATION-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { ; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0 ; ret i32 0 } -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) +declare void @__kmpc_target_deinit(ptr, i8) ; Function Attrs: convergent noinline norecurse nounwind define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -164,7 +164,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @generic_helper() #[[ATTR5]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -172,24 +172,24 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @generic_helper() #5 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) + call void @__kmpc_target_deinit(ptr @1, i8 1) ret void worker.exit: ; preds = %entry @@ -201,55 +201,52 @@ define internal void @spmd_helper() #1 { ; CHECK-LABEL: define {{[^@]+}}@spmd_helper ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-NEXT: call void @leaf() #[[ATTR5]] -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@spmd_helper ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* @__omp_outlined___wrapper.ID, i8** [[TMP1]], i64 0) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 + %captured_vars_addrs = alloca [0 x ptr], align 8 call void @leaf() #5 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** - call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) + %0 = call i32 @__kmpc_global_thread_num(ptr @2) + call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) ret void } ; Function Attrs: convergent noinline norecurse nounwind -define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-DISABLE-SPMDIZATION-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLE-SPMDIZATION-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %.global_tid..addr = alloca i32*, align 8 - %.bound_tid..addr = alloca i32*, align 8 - store i32* %.global_tid., i32** %.global_tid..addr, align 8 - store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 + %.global_tid..addr = alloca ptr, align 8 + %.bound_tid..addr = alloca ptr, align 8 + store ptr %.global_tid., ptr %.global_tid..addr, align 8 + store ptr %.bound_tid., ptr %.bound_tid..addr, align 8 call void @unknown() #5 ret void } @@ -262,9 +259,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { ; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper @@ -273,31 +270,31 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: %.addr = alloca i16, align 2 %.addr1 = alloca i32, align 4 %.zero.addr = alloca i32, align 4 - %global_args = alloca i8**, align 8 - store i16 %0, i16* %.addr, align 2 - store i32 %1, i32* %.addr1, align 4 - store i32 0, i32* %.zero.addr, align 4 - call void @__kmpc_get_shared_variables(i8*** %global_args) - call void @__omp_outlined__(i32* %.addr1, i32* %.zero.addr) #3 + %global_args = alloca ptr, align 8 + store i16 %0, ptr %.addr, align 2 + store i32 %1, ptr %.addr1, align 4 + store i32 0, ptr %.zero.addr, align 4 + call void @__kmpc_get_shared_variables(ptr %global_args) + call void @__omp_outlined__(ptr %.addr1, ptr %.zero.addr) #3 ret void } -declare void @__kmpc_get_shared_variables(i8***) +declare void @__kmpc_get_shared_variables(ptr) ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 +declare i32 @__kmpc_global_thread_num(ptr) #3 ; Function Attrs: alwaysinline -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4 +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4 ; Function Attrs: convergent noinline nounwind define internal void @leaf() #1 { @@ -310,12 +307,12 @@ define internal void @leaf() #1 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: -; CHECK-NEXT: store i32 42, i32* @G, align 4 +; CHECK-NEXT: store i32 42, ptr @G, align 4 ; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB3]], i32 [[TMP0]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: ret void @@ -323,11 +320,11 @@ define internal void @leaf() #1 { ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@leaf ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: store i32 42, i32* @G, align 4 +; CHECK-DISABLE-SPMDIZATION-NEXT: store i32 42, ptr @G, align 4 ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - store i32 42, i32* @G, align 4 + store i32 42, ptr @G, align 4 ret void } @@ -366,8 +363,8 @@ attributes #5 = { convergent } !0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} !1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -!2 = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -!3 = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} +!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} +!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} @@ -393,8 +390,8 @@ attributes #5 = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK: [[META2:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} +; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} @@ -404,8 +401,8 @@ attributes #5 = { convergent } ;. ; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} -; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} +; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1} +; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1} ; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll index 6164519e0b2f1d..ff5ce93f66b98e 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -37,32 +37,32 @@ target triple = "nvptx64" ;; } ;; } -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 -@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1 -@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([72 x i8], [72 x i8]* @2, i32 0, i32 0) }, align 8 +@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 @4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 -@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([104 x i8], [104 x i8]* @4, i32 0, i32 0) }, align 8 +@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1 @6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 -@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @6, i32 0, i32 0) }, align 8 +@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 @8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1 -@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([75 x i8], [75 x i8]* @8, i32 0, i32 0) }, align 8 +@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 @10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 -@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([107 x i8], [107 x i8]* @10, i32 0, i32 0) }, align 8 +@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1 @12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1 -@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([63 x i8], [63 x i8]* @12, i32 0, i32 0) }, align 8 +@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 @G = external global i32 -@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" +@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: - %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18 + %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -70,20 +70,18 @@ common.ret: ; preds = %entry, %user_code.e ret void, !dbg !19 user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3 + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3 call void @unknown() #6, !dbg !20 - %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8* - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3 + %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28 + call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +define weak i32 @__kmpc_target_init(ptr, i8, i1) { ret i32 0 } @@ -94,23 +92,22 @@ declare void @unknown() local_unnamed_addr #1 ; Function Attrs: nounwind define hidden void @known() local_unnamed_addr #2 !dbg !29 { entry: - %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13) - %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30 - call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30 + %captured_vars_addrs = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13) + call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30 ret void, !dbg !31 } ; Function Attrs: nounwind -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 +declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr +declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: - %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33 + %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 + %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -118,28 +115,26 @@ common.ret: ; preds = %entry, %user_code.e ret void, !dbg !34 user_code.entry: ; preds = %entry - %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3 - %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8* - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39 - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42 - call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3 - %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3 - call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43 - call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 + %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42 + call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 + %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 + call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43 + call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 call void @spmd_amenable() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46 + call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } ; Function Attrs: convergent norecurse nounwind -define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 { +define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 { entry: call void @unknown() #6, !dbg !48 ret void, !dbg !49 @@ -148,21 +143,21 @@ entry: ; Function Attrs: convergent norecurse nounwind define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 { entry: - %global_args = alloca i8**, align 8 - call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51 + %global_args = alloca ptr, align 8 + call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51 call void @unknown() #6, !dbg !52 ret void, !dbg !51 } -declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr +declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr +declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr ; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5 ; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5 declare void @spmd_amenable() #7 @@ -186,8 +181,8 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" } !2 = !{} !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1} !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0} -!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} -!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} +!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1} +!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1} !7 = !{i32 7, !"Dwarf Version", i32 2} !8 = !{i32 2, !"Debug Info Version", i32 3} !9 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll index add432de2e34a4..7a45eb5e8be019 100644 --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -4,7 +4,7 @@ target triple = "amdgcn-amd-amdhsa" -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @G = internal addrspace(3) global i32 undef, align 4 @@ -16,84 +16,84 @@ define void @kernel() "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: store i32 1, ptr addrspace(3) @G, align 4 ; TUNIT-NEXT: br label [[IF_MERGE:%.*]] ; TUNIT: if.else: ; TUNIT-NEXT: call void @barrier() #[[ATTR4:[0-9]+]] -; TUNIT-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4 ; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR4]] ; TUNIT-NEXT: br label [[IF_MERGE]] ; TUNIT: if.merge: ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] ; TUNIT: if.then2: -; TUNIT-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 +; TUNIT-NEXT: store i32 2, ptr addrspace(3) @G, align 4 ; TUNIT-NEXT: call void @barrier() #[[ATTR4]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) +; TUNIT-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: store i32 1, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: store i32 1, ptr addrspace(3) @G, align 4 ; CGSCC-NEXT: br label [[IF_MERGE:%.*]] ; CGSCC: if.else: ; CGSCC-NEXT: call void @barrier() -; CGSCC-NEXT: [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4 ; CGSCC-NEXT: call void @use1(i32 [[L]]) ; CGSCC-NEXT: br label [[IF_MERGE]] ; CGSCC: if.merge: ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] ; CGSCC: if.then2: -; CGSCC-NEXT: store i32 2, i32 addrspace(3)* @G, align 4 +; CGSCC-NEXT: store i32 2, ptr addrspace(3) @G, align 4 ; CGSCC-NEXT: call void @barrier() ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) +; CGSCC-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) ; CGSCC-NEXT: ret void ; - %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false) + %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: - store i32 1, i32 addrspace(3)* @G + store i32 1, ptr addrspace(3) @G br label %if.merge if.else: call void @barrier(); - %l = load i32, i32 addrspace(3)* @G + %l = load i32, ptr addrspace(3) @G call void @use1(i32 %l) br label %if.merge if.merge: br i1 %cmp, label %if.then2, label %if.end if.then2: - store i32 2, i32 addrspace(3)* @G + store i32 2, ptr addrspace(3) @G call void @barrier(); br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1) + call void @__kmpc_target_deinit(ptr undef, i8 1) ret void } declare void @barrier() norecurse nounwind nocallback declare void @use1(i32) nosync norecurse nounwind nocallback -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback -declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback +declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback +declare void @__kmpc_target_deinit(ptr, i8) nocallback !llvm.module.flags = !{!0, !1} !nvvm.annotations = !{!2} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} -!2 = !{void ()* @kernel, !"kernel", i32 1} +!2 = !{ptr @kernel, !"kernel", i32 1} ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } @@ -109,7 +109,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} -; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK: {{.*}} diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll index 2f317fb30e864f..14e563cef6680c 100644 --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll @@ -6,68 +6,61 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 @.__omp_offloading_heavyComputation.region_id = weak constant i8 0 @.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35] -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8 ; CHECK-LABEL: {{[^@]+}}Successfully got offload values: -; CHECK-NEXT: offload_baseptrs: double* %a --- %size.addr = alloca i32, align 4 --- -; CHECK-NEXT: offload_ptrs: double* %a --- %size.addr = alloca i32, align 4 --- +; CHECK-NEXT: offload_baseptrs: ptr %a --- %size.addr = alloca i32, align 4 --- +; CHECK-NEXT: offload_ptrs: ptr %a --- %size.addr = alloca i32, align 4 --- ; CHECK-NEXT: offload_sizes: %0 = shl nuw nsw i64 %conv, 3 --- i64 4 --- -;int heavyComputation(double* a, unsigned size) { +;int heavyComputation(ptr a, unsigned size) { ; int random = rand() % 7; ; ; //#pragma omp target data map(a[0:size], size) -; void* args[2]; +; ptr args[2]; ; args[0] = &a; ; args[1] = &size; ; __tgt_target_data_begin(..., args, ...) ; ; #pragma omp target teams ; for (int i = 0; i < size; ++i) { -; a[i] = ++a[i] * 3.141624; +; a[i] = ++aptr 3.141624; ; } ; ; return random; ;} -define dso_local i32 @heavyComputation(double* %a, i32 %size) { +define dso_local i32 @heavyComputation(ptr %a, i32 %size) { entry: %size.addr = alloca i32, align 4 - %.offload_baseptrs = alloca [2 x i8*], align 8 - %.offload_ptrs = alloca [2 x i8*], align 8 + %.offload_baseptrs = alloca [2 x ptr], align 8 + %.offload_ptrs = alloca [2 x ptr], align 8 %.offload_sizes = alloca [2 x i64], align 8 - store i32 %size, i32* %size.addr, align 4 + store i32 %size, ptr %size.addr, align 4 %call = tail call i32 (...) @rand() %conv = zext i32 %size to i64 %0 = shl nuw nsw i64 %conv, 3 - %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0 - %2 = bitcast [2 x i8*]* %.offload_baseptrs to double** - store double* %a, double** %2, align 8 - %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0 - %4 = bitcast [2 x i8*]* %.offload_ptrs to double** - store double* %a, double** %4, align 8 - %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0 - store i64 %0, i64* %5, align 8 - %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1 - %7 = bitcast i8** %6 to i32** - store i32* %size.addr, i32** %7, align 8 - %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1 - %9 = bitcast i8** %8 to i32** - store i32* %size.addr, i32** %9, align 8 - %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 - store i64 4, i64* %10, align 8 - call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null) + store ptr %a, ptr %.offload_baseptrs, align 8 + store ptr %a, ptr %.offload_ptrs, align 8 + store i64 %0, ptr %.offload_sizes, align 8 + %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1 + store ptr %size.addr, ptr %1, align 8 + %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1 + store ptr %size.addr, ptr %2, align 8 + %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1 + store i64 4, ptr %3, align 8 + call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null) %rem = srem i32 %call, 7 - call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null) + call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null) ret i32 %rem } -declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) -declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) +declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) +declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) declare dso_local i32 @rand(...)