diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index c529408366d84c..741cfba14f31d5 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -2,306 +2,306 @@
 ; RUN: opt < %s -S -passes=openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-%struct.omp_lock_t = type { i8* }
-%struct.omp_nest_lock_t = type { i8* }
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.omp_lock_t = type { ptr }
+%struct.omp_nest_lock_t = type { ptr }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
-define void @call_all(i32 %schedule, %struct.omp_lock_t* %lock, i32 %lock_hint, %struct.omp_nest_lock_t* %nest_lock, i32 %i, i8* %s, i64 %st, i8* %vp, double %d, i32 %proc_bind, i64 %allocator_handle, i8* %cp, i64 %event_handle, i32 %pause_resource) {
+define void @call_all(i32 %schedule, ptr %lock, i32 %lock_hint, ptr %nest_lock, i32 %i, ptr %s, i64 %st, ptr %vp, double %d, i32 %proc_bind, i64 %allocator_handle, ptr %cp, i64 %event_handle, i32 %pause_resource) {
 entry:
   %schedule.addr = alloca i32, align 4
-  %lock.addr = alloca %struct.omp_lock_t*, align 8
+  %lock.addr = alloca ptr, align 8
   %lock_hint.addr = alloca i32, align 4
-  %nest_lock.addr = alloca %struct.omp_nest_lock_t*, align 8
+  %nest_lock.addr = alloca ptr, align 8
   %i.addr = alloca i32, align 4
-  %s.addr = alloca i8*, align 8
+  %s.addr = alloca ptr, align 8
   %st.addr = alloca i64, align 8
-  %vp.addr = alloca i8*, align 8
+  %vp.addr = alloca ptr, align 8
   %d.addr = alloca double, align 8
   %proc_bind.addr = alloca i32, align 4
   %allocator_handle.addr = alloca i64, align 8
-  %cp.addr = alloca i8*, align 8
+  %cp.addr = alloca ptr, align 8
   %event_handle.addr = alloca i64, align 8
   %pause_resource.addr = alloca i32, align 4
-  store i32 %schedule, i32* %schedule.addr, align 4
-  store %struct.omp_lock_t* %lock, %struct.omp_lock_t** %lock.addr, align 8
-  store i32 %lock_hint, i32* %lock_hint.addr, align 4
-  store %struct.omp_nest_lock_t* %nest_lock, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  store i32 %i, i32* %i.addr, align 4
-  store i8* %s, i8** %s.addr, align 8
-  store i64 %st, i64* %st.addr, align 8
-  store i8* %vp, i8** %vp.addr, align 8
-  store double %d, double* %d.addr, align 8
-  store i32 %proc_bind, i32* %proc_bind.addr, align 4
-  store i64 %allocator_handle, i64* %allocator_handle.addr, align 8
-  store i8* %cp, i8** %cp.addr, align 8
-  store i64 %event_handle, i64* %event_handle.addr, align 8
-  store i32 %pause_resource, i32* %pause_resource.addr, align 4
+  store i32 %schedule, ptr %schedule.addr, align 4
+  store ptr %lock, ptr %lock.addr, align 8
+  store i32 %lock_hint, ptr %lock_hint.addr, align 4
+  store ptr %nest_lock, ptr %nest_lock.addr, align 8
+  store i32 %i, ptr %i.addr, align 4
+  store ptr %s, ptr %s.addr, align 8
+  store i64 %st, ptr %st.addr, align 8
+  store ptr %vp, ptr %vp.addr, align 8
+  store double %d, ptr %d.addr, align 8
+  store i32 %proc_bind, ptr %proc_bind.addr, align 4
+  store i64 %allocator_handle, ptr %allocator_handle.addr, align 8
+  store ptr %cp, ptr %cp.addr, align 8
+  store i64 %event_handle, ptr %event_handle.addr, align 8
+  store i32 %pause_resource, ptr %pause_resource.addr, align 4
   call void @omp_set_num_threads(i32 0)
   call void @omp_set_dynamic(i32 0)
   call void @omp_set_nested(i32 0)
   call void @omp_set_max_active_levels(i32 0)
-  %0 = load i32, i32* %schedule.addr, align 4
+  %0 = load i32, ptr %schedule.addr, align 4
   call void @omp_set_schedule(i32 %0, i32 0)
   %call = call i32 @omp_get_num_threads()
-  store i32 %call, i32* %i.addr, align 4
-  %1 = load i32, i32* %i.addr, align 4
+  store i32 %call, ptr %i.addr, align 4
+  %1 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %1)
   %call1 = call i32 @omp_get_dynamic()
-  store i32 %call1, i32* %i.addr, align 4
-  %2 = load i32, i32* %i.addr, align 4
+  store i32 %call1, ptr %i.addr, align 4
+  %2 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %2)
   %call2 = call i32 @omp_get_nested()
-  store i32 %call2, i32* %i.addr, align 4
-  %3 = load i32, i32* %i.addr, align 4
+  store i32 %call2, ptr %i.addr, align 4
+  %3 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %3)
   %call3 = call i32 @omp_get_max_threads()
-  store i32 %call3, i32* %i.addr, align 4
-  %4 = load i32, i32* %i.addr, align 4
+  store i32 %call3, ptr %i.addr, align 4
+  %4 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %4)
   %call4 = call i32 @omp_get_thread_num()
-  store i32 %call4, i32* %i.addr, align 4
-  %5 = load i32, i32* %i.addr, align 4
+  store i32 %call4, ptr %i.addr, align 4
+  %5 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %5)
   %call5 = call i32 @omp_get_num_procs()
-  store i32 %call5, i32* %i.addr, align 4
-  %6 = load i32, i32* %i.addr, align 4
+  store i32 %call5, ptr %i.addr, align 4
+  %6 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %6)
   %call6 = call i32 @omp_in_parallel()
-  store i32 %call6, i32* %i.addr, align 4
-  %7 = load i32, i32* %i.addr, align 4
+  store i32 %call6, ptr %i.addr, align 4
+  %7 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %7)
   %call7 = call i32 @omp_in_final()
-  store i32 %call7, i32* %i.addr, align 4
-  %8 = load i32, i32* %i.addr, align 4
+  store i32 %call7, ptr %i.addr, align 4
+  %8 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %8)
   %call8 = call i32 @omp_get_active_level()
-  store i32 %call8, i32* %i.addr, align 4
-  %9 = load i32, i32* %i.addr, align 4
+  store i32 %call8, ptr %i.addr, align 4
+  %9 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %9)
   %call9 = call i32 @omp_get_level()
-  store i32 %call9, i32* %i.addr, align 4
-  %10 = load i32, i32* %i.addr, align 4
+  store i32 %call9, ptr %i.addr, align 4
+  %10 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %10)
   %call10 = call i32 @omp_get_ancestor_thread_num(i32 0)
-  store i32 %call10, i32* %i.addr, align 4
-  %11 = load i32, i32* %i.addr, align 4
+  store i32 %call10, ptr %i.addr, align 4
+  %11 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %11)
   %call11 = call i32 @omp_get_team_size(i32 0)
-  store i32 %call11, i32* %i.addr, align 4
-  %12 = load i32, i32* %i.addr, align 4
+  store i32 %call11, ptr %i.addr, align 4
+  %12 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %12)
   %call12 = call i32 @omp_get_thread_limit()
-  store i32 %call12, i32* %i.addr, align 4
-  %13 = load i32, i32* %i.addr, align 4
+  store i32 %call12, ptr %i.addr, align 4
+  %13 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %13)
   %call13 = call i32 @omp_get_max_active_levels()
-  store i32 %call13, i32* %i.addr, align 4
-  %14 = load i32, i32* %i.addr, align 4
+  store i32 %call13, ptr %i.addr, align 4
+  %14 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %14)
-  call void @omp_get_schedule(i32* %schedule.addr, i32* %i.addr)
+  call void @omp_get_schedule(ptr %schedule.addr, ptr %i.addr)
   %call14 = call i32 @omp_get_max_task_priority()
-  store i32 %call14, i32* %i.addr, align 4
-  %15 = load i32, i32* %i.addr, align 4
+  store i32 %call14, ptr %i.addr, align 4
+  %15 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %15)
-  %16 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  call void @omp_init_lock(%struct.omp_lock_t* %16)
-  %17 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  call void @omp_set_lock(%struct.omp_lock_t* %17)
-  %18 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  call void @omp_unset_lock(%struct.omp_lock_t* %18)
-  %19 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  call void @omp_destroy_lock(%struct.omp_lock_t* %19)
-  %20 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  %call15 = call i32 @omp_test_lock(%struct.omp_lock_t* %20)
-  store i32 %call15, i32* %i.addr, align 4
-  %21 = load i32, i32* %i.addr, align 4
+  %16 = load ptr, ptr %lock.addr, align 8
+  call void @omp_init_lock(ptr %16)
+  %17 = load ptr, ptr %lock.addr, align 8
+  call void @omp_set_lock(ptr %17)
+  %18 = load ptr, ptr %lock.addr, align 8
+  call void @omp_unset_lock(ptr %18)
+  %19 = load ptr, ptr %lock.addr, align 8
+  call void @omp_destroy_lock(ptr %19)
+  %20 = load ptr, ptr %lock.addr, align 8
+  %call15 = call i32 @omp_test_lock(ptr %20)
+  store i32 %call15, ptr %i.addr, align 4
+  %21 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %21)
-  %22 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  call void @omp_init_nest_lock(%struct.omp_nest_lock_t* %22)
-  %23 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  call void @omp_set_nest_lock(%struct.omp_nest_lock_t* %23)
-  %24 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  call void @omp_unset_nest_lock(%struct.omp_nest_lock_t* %24)
-  %25 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  call void @omp_destroy_nest_lock(%struct.omp_nest_lock_t* %25)
-  %26 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  %call16 = call i32 @omp_test_nest_lock(%struct.omp_nest_lock_t* %26)
-  store i32 %call16, i32* %i.addr, align 4
-  %27 = load i32, i32* %i.addr, align 4
+  %22 = load ptr, ptr %nest_lock.addr, align 8
+  call void @omp_init_nest_lock(ptr %22)
+  %23 = load ptr, ptr %nest_lock.addr, align 8
+  call void @omp_set_nest_lock(ptr %23)
+  %24 = load ptr, ptr %nest_lock.addr, align 8
+  call void @omp_unset_nest_lock(ptr %24)
+  %25 = load ptr, ptr %nest_lock.addr, align 8
+  call void @omp_destroy_nest_lock(ptr %25)
+  %26 = load ptr, ptr %nest_lock.addr, align 8
+  %call16 = call i32 @omp_test_nest_lock(ptr %26)
+  store i32 %call16, ptr %i.addr, align 4
+  %27 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %27)
-  %28 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8
-  %29 = load i32, i32* %lock_hint.addr, align 4
-  call void @omp_init_lock_with_hint(%struct.omp_lock_t* %28, i32 %29)
-  %30 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8
-  %31 = load i32, i32* %lock_hint.addr, align 4
-  call void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t* %30, i32 %31)
+  %28 = load ptr, ptr %lock.addr, align 8
+  %29 = load i32, ptr %lock_hint.addr, align 4
+  call void @omp_init_lock_with_hint(ptr %28, i32 %29)
+  %30 = load ptr, ptr %nest_lock.addr, align 8
+  %31 = load i32, ptr %lock_hint.addr, align 4
+  call void @omp_init_nest_lock_with_hint(ptr %30, i32 %31)
   %call17 = call double @omp_get_wtime()
-  store double %call17, double* %d.addr, align 8
-  %32 = load double, double* %d.addr, align 8
+  store double %call17, ptr %d.addr, align 8
+  %32 = load double, ptr %d.addr, align 8
   call void @use_double(double %32)
   %call18 = call double @omp_get_wtick()
-  store double %call18, double* %d.addr, align 8
-  %33 = load double, double* %d.addr, align 8
+  store double %call18, ptr %d.addr, align 8
+  %33 = load double, ptr %d.addr, align 8
   call void @use_double(double %33)
   %call19 = call i32 @omp_get_default_device()
-  store i32 %call19, i32* %i.addr, align 4
-  %34 = load i32, i32* %i.addr, align 4
+  store i32 %call19, ptr %i.addr, align 4
+  %34 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %34)
   call void @omp_set_default_device(i32 0)
   %call20 = call i32 @omp_is_initial_device()
-  store i32 %call20, i32* %i.addr, align 4
-  %35 = load i32, i32* %i.addr, align 4
+  store i32 %call20, ptr %i.addr, align 4
+  %35 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %35)
   %call21 = call i32 @omp_get_num_devices()
-  store i32 %call21, i32* %i.addr, align 4
-  %36 = load i32, i32* %i.addr, align 4
+  store i32 %call21, ptr %i.addr, align 4
+  %36 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %36)
   %call22 = call i32 @omp_get_num_teams()
-  store i32 %call22, i32* %i.addr, align 4
-  %37 = load i32, i32* %i.addr, align 4
+  store i32 %call22, ptr %i.addr, align 4
+  %37 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %37)
   %call23 = call i32 @omp_get_team_num()
-  store i32 %call23, i32* %i.addr, align 4
-  %38 = load i32, i32* %i.addr, align 4
+  store i32 %call23, ptr %i.addr, align 4
+  %38 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %38)
   %call24 = call i32 @omp_get_cancellation()
-  store i32 %call24, i32* %i.addr, align 4
-  %39 = load i32, i32* %i.addr, align 4
+  store i32 %call24, ptr %i.addr, align 4
+  %39 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %39)
   %call25 = call i32 @omp_get_initial_device()
-  store i32 %call25, i32* %i.addr, align 4
-  %40 = load i32, i32* %i.addr, align 4
+  store i32 %call25, ptr %i.addr, align 4
+  %40 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %40)
-  %41 = load i64, i64* %st.addr, align 8
-  %42 = load i32, i32* %i.addr, align 4
-  %call26 = call i8* @omp_target_alloc(i64 %41, i32 %42)
-  store i8* %call26, i8** %vp.addr, align 8
-  %43 = load i8*, i8** %vp.addr, align 8
-  call void @use_voidptr(i8* %43)
-  %44 = load i8*, i8** %vp.addr, align 8
-  %45 = load i32, i32* %i.addr, align 4
-  call void @omp_target_free(i8* %44, i32 %45)
-  %46 = load i8*, i8** %vp.addr, align 8
-  %47 = load i32, i32* %i.addr, align 4
-  %call27 = call i32 @omp_target_is_present(i8* %46, i32 %47)
-  store i32 %call27, i32* %i.addr, align 4
-  %48 = load i32, i32* %i.addr, align 4
+  %41 = load i64, ptr %st.addr, align 8
+  %42 = load i32, ptr %i.addr, align 4
+  %call26 = call ptr @omp_target_alloc(i64 %41, i32 %42)
+  store ptr %call26, ptr %vp.addr, align 8
+  %43 = load ptr, ptr %vp.addr, align 8
+  call void @use_voidptr(ptr %43)
+  %44 = load ptr, ptr %vp.addr, align 8
+  %45 = load i32, ptr %i.addr, align 4
+  call void @omp_target_free(ptr %44, i32 %45)
+  %46 = load ptr, ptr %vp.addr, align 8
+  %47 = load i32, ptr %i.addr, align 4
+  %call27 = call i32 @omp_target_is_present(ptr %46, i32 %47)
+  store i32 %call27, ptr %i.addr, align 4
+  %48 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %48)
-  %49 = load i8*, i8** %vp.addr, align 8
-  %50 = load i8*, i8** %vp.addr, align 8
-  %51 = load i64, i64* %st.addr, align 8
-  %52 = load i64, i64* %st.addr, align 8
-  %53 = load i64, i64* %st.addr, align 8
-  %54 = load i32, i32* %i.addr, align 4
-  %55 = load i32, i32* %i.addr, align 4
-  %call28 = call i32 @omp_target_memcpy(i8* %49, i8* %50, i64 %51, i64 %52, i64 %53, i32 %54, i32 %55)
-  store i32 %call28, i32* %i.addr, align 4
-  %56 = load i32, i32* %i.addr, align 4
+  %49 = load ptr, ptr %vp.addr, align 8
+  %50 = load ptr, ptr %vp.addr, align 8
+  %51 = load i64, ptr %st.addr, align 8
+  %52 = load i64, ptr %st.addr, align 8
+  %53 = load i64, ptr %st.addr, align 8
+  %54 = load i32, ptr %i.addr, align 4
+  %55 = load i32, ptr %i.addr, align 4
+  %call28 = call i32 @omp_target_memcpy(ptr %49, ptr %50, i64 %51, i64 %52, i64 %53, i32 %54, i32 %55)
+  store i32 %call28, ptr %i.addr, align 4
+  %56 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %56)
-  %57 = load i8*, i8** %vp.addr, align 8
-  %58 = load i8*, i8** %vp.addr, align 8
-  %59 = load i64, i64* %st.addr, align 8
-  %60 = load i64, i64* %st.addr, align 8
-  %61 = load i32, i32* %i.addr, align 4
-  %call29 = call i32 @omp_target_associate_ptr(i8* %57, i8* %58, i64 %59, i64 %60, i32 %61)
-  store i32 %call29, i32* %i.addr, align 4
-  %62 = load i32, i32* %i.addr, align 4
+  %57 = load ptr, ptr %vp.addr, align 8
+  %58 = load ptr, ptr %vp.addr, align 8
+  %59 = load i64, ptr %st.addr, align 8
+  %60 = load i64, ptr %st.addr, align 8
+  %61 = load i32, ptr %i.addr, align 4
+  %call29 = call i32 @omp_target_associate_ptr(ptr %57, ptr %58, i64 %59, i64 %60, i32 %61)
+  store i32 %call29, ptr %i.addr, align 4
+  %62 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %62)
-  %63 = load i8*, i8** %vp.addr, align 8
-  %64 = load i32, i32* %i.addr, align 4
-  %call30 = call i32 @omp_target_disassociate_ptr(i8* %63, i32 %64)
-  store i32 %call30, i32* %i.addr, align 4
-  %65 = load i32, i32* %i.addr, align 4
+  %63 = load ptr, ptr %vp.addr, align 8
+  %64 = load i32, ptr %i.addr, align 4
+  %call30 = call i32 @omp_target_disassociate_ptr(ptr %63, i32 %64)
+  store i32 %call30, ptr %i.addr, align 4
+  %65 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %65)
   %call31 = call i32 @omp_get_device_num()
-  store i32 %call31, i32* %i.addr, align 4
-  %66 = load i32, i32* %i.addr, align 4
+  store i32 %call31, ptr %i.addr, align 4
+  %66 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %66)
   %call32 = call i32 @omp_get_proc_bind()
-  store i32 %call32, i32* %proc_bind.addr, align 4
+  store i32 %call32, ptr %proc_bind.addr, align 4
   %call33 = call i32 @omp_get_num_places()
-  store i32 %call33, i32* %i.addr, align 4
-  %67 = load i32, i32* %i.addr, align 4
+  store i32 %call33, ptr %i.addr, align 4
+  %67 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %67)
   %call34 = call i32 @omp_get_place_num_procs(i32 0)
-  store i32 %call34, i32* %i.addr, align 4
-  %68 = load i32, i32* %i.addr, align 4
+  store i32 %call34, ptr %i.addr, align 4
+  %68 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %68)
-  %69 = load i32, i32* %i.addr, align 4
-  call void @omp_get_place_proc_ids(i32 %69, i32* %i.addr)
+  %69 = load i32, ptr %i.addr, align 4
+  call void @omp_get_place_proc_ids(i32 %69, ptr %i.addr)
   %call35 = call i32 @omp_get_place_num()
-  store i32 %call35, i32* %i.addr, align 4
-  %70 = load i32, i32* %i.addr, align 4
+  store i32 %call35, ptr %i.addr, align 4
+  %70 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %70)
   %call36 = call i32 @omp_get_partition_num_places()
-  store i32 %call36, i32* %i.addr, align 4
-  %71 = load i32, i32* %i.addr, align 4
+  store i32 %call36, ptr %i.addr, align 4
+  %71 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %71)
-  call void @omp_get_partition_place_nums(i32* %i.addr)
-  %72 = load i32, i32* %i.addr, align 4
-  %73 = load i32, i32* %i.addr, align 4
-  %74 = load i8*, i8** %vp.addr, align 8
-  %call37 = call i32 @omp_control_tool(i32 %72, i32 %73, i8* %74)
-  store i32 %call37, i32* %i.addr, align 4
-  %75 = load i32, i32* %i.addr, align 4
+  call void @omp_get_partition_place_nums(ptr %i.addr)
+  %72 = load i32, ptr %i.addr, align 4
+  %73 = load i32, ptr %i.addr, align 4
+  %74 = load ptr, ptr %vp.addr, align 8
+  %call37 = call i32 @omp_control_tool(i32 %72, i32 %73, ptr %74)
+  store i32 %call37, ptr %i.addr, align 4
+  %75 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %75)
-  %76 = load i64, i64* %allocator_handle.addr, align 8
+  %76 = load i64, ptr %allocator_handle.addr, align 8
   call void @omp_destroy_allocator(i64 %76)
-  %77 = load i64, i64* %allocator_handle.addr, align 8
+  %77 = load i64, ptr %allocator_handle.addr, align 8
   call void @omp_set_default_allocator(i64 %77)
   %call38 = call i64 @omp_get_default_allocator()
-  store i64 %call38, i64* %allocator_handle.addr, align 8
-  %78 = load i64, i64* %st.addr, align 8
-  %79 = load i64, i64* %allocator_handle.addr, align 8
-  %call39 = call i8* @omp_alloc(i64 %78, i64 %79)
-  store i8* %call39, i8** %vp.addr, align 8
-  %80 = load i8*, i8** %vp.addr, align 8
-  call void @use_voidptr(i8* %80)
-  %81 = load i8*, i8** %vp.addr, align 8
-  %82 = load i64, i64* %allocator_handle.addr, align 8
-  call void @omp_free(i8* %81, i64 %82)
-  %83 = load i64, i64* %st.addr, align 8
-  %84 = load i64, i64* %allocator_handle.addr, align 8
-  %call40 = call i8* @omp_alloc(i64 %83, i64 %84)
-  store i8* %call40, i8** %vp.addr, align 8
-  %85 = load i8*, i8** %vp.addr, align 8
-  call void @use_voidptr(i8* %85)
-  %86 = load i8*, i8** %vp.addr, align 8
-  %87 = load i64, i64* %allocator_handle.addr, align 8
-  call void @omp_free(i8* %86, i64 %87)
-  %88 = load i8*, i8** %s.addr, align 8
-  call void @ompc_set_affinity_format(i8* %88)
-  %89 = load i8*, i8** %cp.addr, align 8
-  %90 = load i64, i64* %st.addr, align 8
-  %call41 = call i64 @ompc_get_affinity_format(i8* %89, i64 %90)
-  store i64 %call41, i64* %st.addr, align 8
-  %91 = load i64, i64* %st.addr, align 8
+  store i64 %call38, ptr %allocator_handle.addr, align 8
+  %78 = load i64, ptr %st.addr, align 8
+  %79 = load i64, ptr %allocator_handle.addr, align 8
+  %call39 = call ptr @omp_alloc(i64 %78, i64 %79)
+  store ptr %call39, ptr %vp.addr, align 8
+  %80 = load ptr, ptr %vp.addr, align 8
+  call void @use_voidptr(ptr %80)
+  %81 = load ptr, ptr %vp.addr, align 8
+  %82 = load i64, ptr %allocator_handle.addr, align 8
+  call void @omp_free(ptr %81, i64 %82)
+  %83 = load i64, ptr %st.addr, align 8
+  %84 = load i64, ptr %allocator_handle.addr, align 8
+  %call40 = call ptr @omp_alloc(i64 %83, i64 %84)
+  store ptr %call40, ptr %vp.addr, align 8
+  %85 = load ptr, ptr %vp.addr, align 8
+  call void @use_voidptr(ptr %85)
+  %86 = load ptr, ptr %vp.addr, align 8
+  %87 = load i64, ptr %allocator_handle.addr, align 8
+  call void @omp_free(ptr %86, i64 %87)
+  %88 = load ptr, ptr %s.addr, align 8
+  call void @ompc_set_affinity_format(ptr %88)
+  %89 = load ptr, ptr %cp.addr, align 8
+  %90 = load i64, ptr %st.addr, align 8
+  %call41 = call i64 @ompc_get_affinity_format(ptr %89, i64 %90)
+  store i64 %call41, ptr %st.addr, align 8
+  %91 = load i64, ptr %st.addr, align 8
   call void @use_sizet(i64 %91)
-  %92 = load i8*, i8** %s.addr, align 8
-  call void @ompc_display_affinity(i8* %92)
-  %93 = load i8*, i8** %cp.addr, align 8
-  %94 = load i64, i64* %st.addr, align 8
-  %95 = load i8*, i8** %s.addr, align 8
-  %call42 = call i64 @ompc_capture_affinity(i8* %93, i64 %94, i8* %95)
-  store i64 %call42, i64* %st.addr, align 8
-  %96 = load i64, i64* %st.addr, align 8
+  %92 = load ptr, ptr %s.addr, align 8
+  call void @ompc_display_affinity(ptr %92)
+  %93 = load ptr, ptr %cp.addr, align 8
+  %94 = load i64, ptr %st.addr, align 8
+  %95 = load ptr, ptr %s.addr, align 8
+  %call42 = call i64 @ompc_capture_affinity(ptr %93, i64 %94, ptr %95)
+  store i64 %call42, ptr %st.addr, align 8
+  %96 = load i64, ptr %st.addr, align 8
   call void @use_sizet(i64 %96)
-  %97 = load i64, i64* %event_handle.addr, align 8
+  %97 = load i64, ptr %event_handle.addr, align 8
   call void @omp_fulfill_event(i64 %97)
-  %98 = load i32, i32* %pause_resource.addr, align 4
-  %99 = load i32, i32* %i.addr, align 4
+  %98 = load i32, ptr %pause_resource.addr, align 4
+  %99 = load i32, ptr %i.addr, align 4
   %call43 = call i32 @omp_pause_resource(i32 %98, i32 %99)
-  store i32 %call43, i32* %i.addr, align 4
-  %100 = load i32, i32* %i.addr, align 4
+  store i32 %call43, ptr %i.addr, align 4
+  %100 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %100)
-  %101 = load i32, i32* %pause_resource.addr, align 4
+  %101 = load i32, ptr %pause_resource.addr, align 4
   %call44 = call i32 @omp_pause_resource_all(i32 %101)
-  store i32 %call44, i32* %i.addr, align 4
-  %102 = load i32, i32* %i.addr, align 4
+  store i32 %call44, ptr %i.addr, align 4
+  %102 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %102)
   %call45 = call i32 @omp_get_supported_active_levels()
-  store i32 %call45, i32* %i.addr, align 4
-  %103 = load i32, i32* %i.addr, align 4
+  store i32 %call45, ptr %i.addr, align 4
+  %103 = load i32, ptr %i.addr, align 4
   call void @use_int(i32 %103)
   ret void
 }
@@ -346,33 +346,33 @@ declare dso_local i32 @omp_get_thread_limit()
 
 declare dso_local i32 @omp_get_max_active_levels()
 
-declare dso_local void @omp_get_schedule(i32*, i32*)
+declare dso_local void @omp_get_schedule(ptr, ptr)
 
 declare dso_local i32 @omp_get_max_task_priority()
 
-declare dso_local void @omp_init_lock(%struct.omp_lock_t*)
+declare dso_local void @omp_init_lock(ptr)
 
-declare dso_local void @omp_set_lock(%struct.omp_lock_t*)
+declare dso_local void @omp_set_lock(ptr)
 
-declare dso_local void @omp_unset_lock(%struct.omp_lock_t*)
+declare dso_local void @omp_unset_lock(ptr)
 
-declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*)
+declare dso_local void @omp_destroy_lock(ptr)
 
-declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*)
+declare dso_local i32 @omp_test_lock(ptr)
 
-declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*)
+declare dso_local void @omp_init_nest_lock(ptr)
 
-declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*)
+declare dso_local void @omp_set_nest_lock(ptr)
 
-declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*)
+declare dso_local void @omp_unset_nest_lock(ptr)
 
-declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*)
+declare dso_local void @omp_destroy_nest_lock(ptr)
 
-declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*)
+declare dso_local i32 @omp_test_nest_lock(ptr)
 
-declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32)
+declare dso_local void @omp_init_lock_with_hint(ptr, i32)
 
-declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32)
+declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32)
 
 declare dso_local double @omp_get_wtime()
 
@@ -396,19 +396,19 @@ declare dso_local i32 @omp_get_cancellation()
 
 declare dso_local i32 @omp_get_initial_device()
 
-declare dso_local i8* @omp_target_alloc(i64, i32)
+declare dso_local ptr @omp_target_alloc(i64, i32)
 
-declare dso_local void @use_voidptr(i8*)
+declare dso_local void @use_voidptr(ptr)
 
-declare dso_local void @omp_target_free(i8*, i32)
+declare dso_local void @omp_target_free(ptr, i32)
 
-declare dso_local i32 @omp_target_is_present(i8*, i32)
+declare dso_local i32 @omp_target_is_present(ptr, i32)
 
-declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32)
+declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32)
 
-declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32)
+declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32)
 
-declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32)
+declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32)
 
 declare dso_local i32 @omp_get_device_num()
 
@@ -418,15 +418,15 @@ declare dso_local i32 @omp_get_num_places()
 
 declare dso_local i32 @omp_get_place_num_procs(i32)
 
-declare dso_local void @omp_get_place_proc_ids(i32, i32*)
+declare dso_local void @omp_get_place_proc_ids(i32, ptr)
 
 declare dso_local i32 @omp_get_place_num()
 
 declare dso_local i32 @omp_get_partition_num_places()
 
-declare dso_local void @omp_get_partition_place_nums(i32*)
+declare dso_local void @omp_get_partition_place_nums(ptr)
 
-declare dso_local i32 @omp_control_tool(i32, i32, i8*)
+declare dso_local i32 @omp_control_tool(i32, i32, ptr)
 
 declare dso_local void @omp_destroy_allocator(i64)
 
@@ -434,19 +434,19 @@ declare dso_local void @omp_set_default_allocator(i64)
 
 declare dso_local i64 @omp_get_default_allocator()
 
-declare dso_local i8* @omp_alloc(i64, i64)
+declare dso_local ptr @omp_alloc(i64, i64)
 
-declare dso_local void @omp_free(i8*, i64)
+declare dso_local void @omp_free(ptr, i64)
 
-declare dso_local void @ompc_set_affinity_format(i8*)
+declare dso_local void @ompc_set_affinity_format(ptr)
 
-declare dso_local i64 @ompc_get_affinity_format(i8*, i64)
+declare dso_local i64 @ompc_get_affinity_format(ptr, i64)
 
 declare dso_local void @use_sizet(i64)
 
-declare dso_local void @ompc_display_affinity(i8*)
+declare dso_local void @ompc_display_affinity(ptr)
 
-declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*)
+declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr)
 
 declare dso_local void @omp_fulfill_event(i64)
 
@@ -456,216 +456,216 @@ declare dso_local i32 @omp_pause_resource_all(i32)
 
 declare dso_local i32 @omp_get_supported_active_levels()
 
-declare void @__kmpc_barrier(%struct.ident_t*, i32)
+declare void @__kmpc_barrier(ptr, i32)
 
-declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
+declare i32 @__kmpc_cancel(ptr, i32, i32)
 
-declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
+declare i32 @__kmpc_cancel_barrier(ptr, i32)
 
-declare void @__kmpc_flush(%struct.ident_t*)
+declare void @__kmpc_flush(ptr)
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_global_thread_num(ptr)
 
-declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+declare void @__kmpc_fork_call(ptr, i32, ptr, ...)
 
-declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
+declare i32 @__kmpc_omp_taskwait(ptr, i32)
 
-declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
+declare i32 @__kmpc_omp_taskyield(ptr, i32, i32)
 
-declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
+declare void @__kmpc_push_num_threads(ptr, i32, i32)
 
-declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
+declare void @__kmpc_push_proc_bind(ptr, i32, i32)
 
-declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
+declare void @__kmpc_serialized_parallel(ptr, i32)
 
-declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
+declare void @__kmpc_end_serialized_parallel(ptr, i32)
 
-declare i32 @__kmpc_master(%struct.ident_t*, i32)
+declare i32 @__kmpc_master(ptr, i32)
 
-declare void @__kmpc_end_master(%struct.ident_t*, i32)
+declare void @__kmpc_end_master(ptr, i32)
 
-declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
+declare void @__kmpc_critical(ptr, i32, ptr)
 
-declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
+declare void @__kmpc_critical_with_hint(ptr, i32, ptr, i32)
 
-declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
+declare void @__kmpc_end_critical(ptr, i32, ptr)
 
-declare void @__kmpc_begin(%struct.ident_t*, i32)
+declare void @__kmpc_begin(ptr, i32)
 
-declare void @__kmpc_end(%struct.ident_t*)
+declare void @__kmpc_end(ptr)
 
-declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+declare i32 @__kmpc_reduce(ptr, i32, i32, i64, ptr, ptr, ptr)
 
-declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr)
 
-declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
+declare void @__kmpc_end_reduce(ptr, i32, ptr)
 
-declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
+declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr)
 
-declare void @__kmpc_ordered(%struct.ident_t*, i32)
+declare void @__kmpc_ordered(ptr, i32)
 
-declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
+declare void @__kmpc_end_ordered(ptr, i32)
 
-declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64)
 
-declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64)
 
-declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
+declare void @__kmpc_for_static_fini(ptr, i32)
 
-declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_team_static_init_4(ptr, i32, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_team_static_init_4u(ptr, i32, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_team_static_init_8(ptr, i32, ptr, ptr, ptr, ptr, i64, i64)
 
-declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_team_static_init_8u(ptr, i32, ptr, ptr, ptr, ptr, i64, i64)
 
-declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_dist_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+declare void @__kmpc_dist_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
-declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_dist_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64)
 
-declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+declare void @__kmpc_dist_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64)
 
-declare i32 @__kmpc_single(%struct.ident_t*, i32)
+declare i32 @__kmpc_single(ptr, i32)
 
-declare void @__kmpc_end_single(%struct.ident_t*, i32)
+declare void @__kmpc_end_single(ptr, i32)
 
-declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
+declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr)
 
-declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
+declare i32 @__kmpc_omp_task(ptr, i32, ptr)
 
-declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
+declare void @__kmpc_end_taskgroup(ptr, i32)
 
-declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
+declare void @__kmpc_taskgroup(ptr, i32)
 
-declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+declare void @__kmpc_dist_dispatch_init_4(ptr, i32, i32, ptr, i32, i32, i32, i32)
 
-declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+declare void @__kmpc_dist_dispatch_init_4u(ptr, i32, i32, ptr, i32, i32, i32, i32)
 
-declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+declare void @__kmpc_dist_dispatch_init_8(ptr, i32, i32, ptr, i64, i64, i64, i64)
 
-declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+declare void @__kmpc_dist_dispatch_init_8u(ptr, i32, i32, ptr, i64, i64, i64, i64)
 
-declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+declare void @__kmpc_dispatch_init_4(ptr, i32, i32, i32, i32, i32, i32)
 
-declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+declare void @__kmpc_dispatch_init_4u(ptr, i32, i32, i32, i32, i32, i32)
 
-declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+declare void @__kmpc_dispatch_init_8(ptr, i32, i32, i64, i64, i64, i64)
 
-declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+declare void @__kmpc_dispatch_init_8u(ptr, i32, i32, i64, i64, i64, i64)
 
-declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+declare i32 @__kmpc_dispatch_next_4(ptr, i32, ptr, ptr, ptr, ptr)
 
-declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+declare i32 @__kmpc_dispatch_next_4u(ptr, i32, ptr, ptr, ptr, ptr)
 
-declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+declare i32 @__kmpc_dispatch_next_8(ptr, i32, ptr, ptr, ptr, ptr)
 
-declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+declare i32 @__kmpc_dispatch_next_8u(ptr, i32, ptr, ptr, ptr, ptr)
 
-declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
+declare void @__kmpc_dispatch_fini_4(ptr, i32)
 
-declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
+declare void @__kmpc_dispatch_fini_4u(ptr, i32)
 
-declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
+declare void @__kmpc_dispatch_fini_8(ptr, i32)
 
-declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
+declare void @__kmpc_dispatch_fini_8u(ptr, i32)
 
-declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
+declare void @__kmpc_omp_task_begin_if0(ptr, i32, ptr)
 
-declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
+declare void @__kmpc_omp_task_complete_if0(ptr, i32, ptr)
 
-declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
+declare i32 @__kmpc_omp_task_with_deps(ptr, i32, ptr, i32, ptr, i32, ptr)
 
-declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
+declare void @__kmpc_omp_wait_deps(ptr, i32, i32, ptr, i32, ptr)
 
-declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
+declare i32 @__kmpc_cancellationpoint(ptr, i32, i32)
 
-declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
+declare void @__kmpc_push_num_teams(ptr, i32, i32, i32)
 
-declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+declare void @__kmpc_fork_teams(ptr, i32, ptr, ...)
 
-declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
+declare void @__kmpc_taskloop(ptr, i32, ptr, i32, ptr, ptr, i64, i32, i32, i64, ptr)
 
-declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
+declare ptr @__kmpc_omp_target_task_alloc(ptr, i32, i32, i64, i64, ptr, i64)
 
-declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
+declare ptr @__kmpc_taskred_modifier_init(ptr, i32, i32, i32, ptr)
 
-declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+declare ptr @__kmpc_taskred_init(i32, i32, ptr)
 
-declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
+declare void @__kmpc_task_reduction_modifier_fini(ptr, i32, i32)
 
-declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
+declare void @__kmpc_copyprivate(ptr, i32, i64, ptr, ptr, i32)
 
-declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
+declare ptr @__kmpc_threadprivate_cached(ptr, i32, ptr, i64, ptr)
 
-declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
+declare void @__kmpc_threadprivate_register(ptr, ptr, ptr, ptr, ptr)
 
-declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
+declare void @__kmpc_doacross_init(ptr, i32, i32, ptr)
 
-declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
+declare void @__kmpc_doacross_wait(ptr, i32, ptr)
 
-declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
+declare void @__kmpc_doacross_post(ptr, i32, ptr)
 
-declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
+declare void @__kmpc_doacross_fini(ptr, i32)
 
-declare i8* @__kmpc_alloc(i32, i64, i8*)
+declare ptr @__kmpc_alloc(i32, i64, ptr)
 
-declare void @__kmpc_free(i32, i8*, i8*)
+declare void @__kmpc_free(i32, ptr, ptr)
 
-declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+declare ptr @__kmpc_init_allocator(i32, ptr, i32, ptr)
 
-declare void @__kmpc_destroy_allocator(i32, i8*)
+declare void @__kmpc_destroy_allocator(i32, ptr)
 
-declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
+declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64)
 
 declare i64 @__kmpc_warp_active_thread_mask()
 
 declare void @__kmpc_syncwarp(i64)
 
-declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*)
+declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr)
 
-declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32)
+declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
-declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*)
+declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr)
 
 declare void @__tgt_register_requires(i64)
 
-declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
-declare i64 @__tgt_mapper_num_components(i8*)
+declare i64 @__tgt_mapper_num_components(ptr)
 
-declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*)
+declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr)
 
-declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
+declare ptr @__kmpc_task_allow_completion_event(ptr, i32, ptr)
 
-declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+declare ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr)
 
-declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+declare ptr @__kmpc_task_reduction_init(i32, i32, ptr)
 
-declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+declare ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr)
 
-declare void @__kmpc_proxy_task_completed_ooo(i8*)
+declare void @__kmpc_proxy_task_completed_ooo(ptr)
 
 ; Function Attrs: noinline cold
-declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) #0
+declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32) #0
 
 attributes #0 = { noinline cold }
 
@@ -730,46 +730,46 @@ attributes #0 = { noinline cold }
 ; CHECK-NEXT: declare dso_local i32 @omp_get_max_active_levels()
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly)
+; CHECK-NEXT: declare dso_local void @omp_get_schedule(ptr nocapture writeonly, ptr nocapture writeonly)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare dso_local i32 @omp_get_max_task_priority()
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_init_lock(%struct.omp_lock_t*)
+; CHECK: declare dso_local void @omp_init_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_set_lock(%struct.omp_lock_t*)
+; CHECK: declare dso_local void @omp_set_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_unset_lock(%struct.omp_lock_t*)
+; CHECK: declare dso_local void @omp_unset_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*)
+; CHECK: declare dso_local void @omp_destroy_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*)
+; CHECK: declare dso_local i32 @omp_test_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*)
+; CHECK: declare dso_local void @omp_init_nest_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*)
+; CHECK: declare dso_local void @omp_set_nest_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*)
+; CHECK: declare dso_local void @omp_unset_nest_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*)
+; CHECK: declare dso_local void @omp_destroy_nest_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*)
+; CHECK: declare dso_local i32 @omp_test_nest_lock(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32)
+; CHECK: declare dso_local void @omp_init_lock_with_hint(ptr, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32)
+; CHECK: declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare dso_local double @omp_get_wtime()
@@ -805,25 +805,25 @@ attributes #0 = { noinline cold }
 ; CHECK: declare dso_local i32 @omp_get_initial_device()
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i8* @omp_target_alloc(i64, i32)
+; CHECK: declare dso_local ptr @omp_target_alloc(i64, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @use_voidptr(i8*)
+; CHECK: declare dso_local void @use_voidptr(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_target_free(i8*, i32)
+; CHECK: declare dso_local void @omp_target_free(ptr, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_target_is_present(i8*, i32)
+; CHECK: declare dso_local i32 @omp_target_is_present(ptr, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32)
+; CHECK: declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32)
+; CHECK: declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32)
+; CHECK: declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare dso_local i32 @omp_get_device_num()
@@ -838,7 +838,7 @@ attributes #0 = { noinline cold }
 ; CHECK: declare dso_local i32 @omp_get_place_num_procs(i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly)
+; CHECK-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, ptr nocapture writeonly)
 
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare dso_local i32 @omp_get_place_num()
@@ -847,10 +847,10 @@ attributes #0 = { noinline cold }
 ; CHECK-NEXT: declare dso_local i32 @omp_get_partition_num_places()
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*)
+; CHECK-NEXT: declare dso_local void @omp_get_partition_place_nums(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i32 @omp_control_tool(i32, i32, i8*)
+; CHECK: declare dso_local i32 @omp_control_tool(i32, i32, ptr)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare dso_local void @omp_destroy_allocator(i64)
@@ -862,25 +862,25 @@ attributes #0 = { noinline cold }
 ; CHECK: declare dso_local i64 @omp_get_default_allocator()
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i8* @omp_alloc(i64, i64)
+; CHECK: declare dso_local ptr @omp_alloc(i64, i64)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @omp_free(i8*, i64)
+; CHECK: declare dso_local void @omp_free(ptr, i64)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @ompc_set_affinity_format(i8*)
+; CHECK: declare dso_local void @ompc_set_affinity_format(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i64 @ompc_get_affinity_format(i8*, i64)
+; CHECK: declare dso_local i64 @ompc_get_affinity_format(ptr, i64)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare dso_local void @use_sizet(i64)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local void @ompc_display_affinity(i8*)
+; CHECK: declare dso_local void @ompc_display_affinity(ptr)
 
 ; CHECK-NOT: Function Attrs
-; CHECK: declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*)
+; CHECK: declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr)
 
 ; CHECK-NOT: Function Attrs
 ; CHECK: declare dso_local void @omp_fulfill_event(i64)
@@ -895,256 +895,256 @@ attributes #0 = { noinline cold }
 ; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_barrier(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare i32 @__kmpc_cancel(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
+; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*)
+; CHECK-NEXT: declare void @__kmpc_flush(ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK-NEXT: declare void @__kmpc_fork_call(ptr, i32, ptr, ...)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_push_num_threads(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_push_proc_bind(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_serialized_parallel(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
+; CHECK-NEXT: declare i32 @__kmpc_master(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_end_master(ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK-NEXT: declare void @__kmpc_critical(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
+; CHECK-NEXT: declare void @__kmpc_critical_with_hint(ptr, i32, ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK-NEXT: declare void @__kmpc_end_critical(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_begin(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
+; CHECK-NEXT: declare void @__kmpc_end(ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK-NEXT: declare i32 @__kmpc_reduce(ptr, i32, i32, i64, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK-NEXT: declare void @__kmpc_end_reduce(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_ordered(ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_end_ordered(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_for_static_fini(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4(ptr, i32, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(ptr, i32, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8(ptr, i32, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(ptr, i32, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
+; CHECK-NEXT: declare i32 @__kmpc_single(ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_end_single(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
+; CHECK-NEXT: declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
+; CHECK-NEXT: declare i32 @__kmpc_omp_task(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_end_taskgroup(ptr, i32)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_taskgroup(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(ptr, i32, i32, ptr, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(ptr, i32, i32, ptr, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(ptr, i32, i32, ptr, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(ptr, i32, i32, ptr, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(ptr, i32, i32, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(ptr, i32, i32, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(ptr, i32, i32, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(ptr, i32, i32, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(ptr, i32, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(ptr, i32, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(ptr, i32, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(ptr, i32, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
+; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
+; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
+; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(ptr, i32, ptr, i32, ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
+; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(ptr, i32, i32, ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_push_num_teams(ptr, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK-NEXT: declare void @__kmpc_fork_teams(ptr, i32, ptr, ...)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
+; CHECK-NEXT: declare void @__kmpc_taskloop(ptr, i32, ptr, i32, ptr, ptr, i64, i32, i32, i64, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
+; CHECK-NEXT: declare ptr @__kmpc_omp_target_task_alloc(ptr, i32, i32, i64, i64, ptr, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_taskred_modifier_init(ptr, i32, i32, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_taskred_init(i32, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
+; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
+; CHECK-NEXT: declare void @__kmpc_copyprivate(ptr, i32, i64, ptr, ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
+; CHECK-NEXT: declare ptr @__kmpc_threadprivate_cached(ptr, i32, ptr, i64, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
+; CHECK-NEXT: declare void @__kmpc_threadprivate_register(ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
+; CHECK-NEXT: declare void @__kmpc_doacross_init(ptr, i32, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
+; CHECK-NEXT: declare void @__kmpc_doacross_wait(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
+; CHECK-NEXT: declare void @__kmpc_doacross_post(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: convergent nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
+; CHECK-NEXT: declare void @__kmpc_doacross_fini(ptr, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_alloc(i32, i64, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+; CHECK-NEXT: declare void @__kmpc_free(i32, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_init_allocator(i32, ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64)
 
 ; CHECK: ; Function Attrs: convergent nounwind
 ; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
@@ -1153,61 +1153,61 @@ attributes #0 = { noinline cold }
 ; CHECK-NEXT: declare void @__kmpc_syncwarp(i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*)
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32)
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*)
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare void @__tgt_register_requires(i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*)
+; CHECK-NEXT: declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_task_allow_completion_event(ptr, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_task_reduction_init(i32, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+; CHECK-NEXT: declare ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
+; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(ptr)
 
 ; CHECK: ; Function Attrs: cold convergent noinline nounwind
-; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32)
+; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write)
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
@@ -1270,46 +1270,46 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels()
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly)
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(ptr nocapture writeonly, ptr nocapture writeonly)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority()
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_init_lock(%struct.omp_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_init_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_set_lock(%struct.omp_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_set_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_unset_lock(%struct.omp_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_unset_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_destroy_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*)
+; OPTIMISTIC: declare dso_local i32 @omp_test_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_init_nest_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_set_nest_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_unset_nest_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*)
+; OPTIMISTIC: declare dso_local void @omp_destroy_nest_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*)
+; OPTIMISTIC: declare dso_local i32 @omp_test_nest_lock(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32)
+; OPTIMISTIC: declare dso_local void @omp_init_lock_with_hint(ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32)
+; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(ptr, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
 ; OPTIMISTIC-NEXT: declare dso_local double @omp_get_wtime()
@@ -1345,25 +1345,25 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device()
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i8* @omp_target_alloc(i64, i32)
+; OPTIMISTIC: declare dso_local ptr @omp_target_alloc(i64, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @use_voidptr(i8*)
+; OPTIMISTIC: declare dso_local void @use_voidptr(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_target_free(i8*, i32)
+; OPTIMISTIC: declare dso_local void @omp_target_free(ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_target_is_present(i8*, i32)
+; OPTIMISTIC: declare dso_local i32 @omp_target_is_present(ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32)
+; OPTIMISTIC: declare dso_local i32 @omp_target_memcpy(ptr, ptr, i64, i64, i64, i32, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32)
+; OPTIMISTIC: declare dso_local i32 @omp_target_associate_ptr(ptr, ptr, i64, i64, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32)
+; OPTIMISTIC: declare dso_local i32 @omp_target_disassociate_ptr(ptr, i32)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num()
@@ -1378,7 +1378,7 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly)
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, ptr nocapture writeonly)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read)
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num()
@@ -1387,10 +1387,10 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places()
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read)
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*)
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*)
+; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @omp_destroy_allocator(i64)
@@ -1402,25 +1402,25 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC: declare dso_local i64 @omp_get_default_allocator()
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i8* @omp_alloc(i64, i64)
+; OPTIMISTIC: declare dso_local ptr @omp_alloc(i64, i64)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @omp_free(i8*, i64)
+; OPTIMISTIC: declare dso_local void @omp_free(ptr, i64)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @ompc_set_affinity_format(i8*)
+; OPTIMISTIC: declare dso_local void @ompc_set_affinity_format(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i64 @ompc_get_affinity_format(i8*, i64)
+; OPTIMISTIC: declare dso_local i64 @ompc_get_affinity_format(ptr, i64)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @use_sizet(i64)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local void @ompc_display_affinity(i8*)
+; OPTIMISTIC: declare dso_local void @ompc_display_affinity(ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*)
+; OPTIMISTIC: declare dso_local i64 @ompc_capture_affinity(ptr, i64, ptr)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @omp_fulfill_event(i64)
@@ -1435,244 +1435,244 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly, ...)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(ptr nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(ptr nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(ptr nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(ptr nocapture nofree readonly, i32, ptr, i32)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_begin(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_begin(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end(ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(ptr nocapture nofree readonly, i32, i32, i64, ptr nocapture nofree readonly, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(ptr nocapture nofree readonly, i32, i32, i64, ptr nocapture nofree readonly, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_omp_task_alloc(ptr nocapture nofree readonly, i32, i32, i64, i64, ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i32, i32, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i32, i32, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i64, i64, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree, i64, i64, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(ptr nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(ptr nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(ptr nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(ptr nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(ptr nocapture nofree readonly, i32, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree, ptr nocapture nofree)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(ptr nocapture nofree readonly, i32, ptr, i32, ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(ptr nocapture nofree readonly, i32, i32, ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32)
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(ptr nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite)
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(ptr nocapture nofree readonly, i32, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly, ...)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i64* nocapture nofree, i64* nocapture nofree, i64, i32, i32, i64, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(ptr nocapture nofree readonly, i32, ptr, i32, ptr nocapture nofree, ptr nocapture nofree, i64, i32, i32, i64, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly, i64)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_omp_target_task_alloc(ptr nocapture nofree readonly, i32, i32, i64, i64, ptr nocapture nofree readonly, i64)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_taskred_modifier_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_taskred_modifier_init(ptr nocapture nofree readonly, i32, i32, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+; OPTIMISTIC-NEXT: declare ptr @__kmpc_taskred_init(i32, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(ptr nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t* nocapture nofree readonly, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(ptr nocapture nofree readonly, i32, i64, ptr nocapture nofree readonly, ptr, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_threadprivate_cached(%struct.ident_t* nocapture nofree readonly, i32, i8*, i64, i8***)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_threadprivate_cached(ptr nocapture nofree readonly, i32, ptr, i64, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(ptr nocapture nofree readonly, ptr, ptr nocapture nofree readonly, ptr nocapture nofree readonly, ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(ptr nocapture nofree readonly, i32, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(ptr nocapture nofree readonly, i32, ptr nocapture nofree readonly)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(ptr nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_alloc(i32, i64, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_init_allocator(i32, ptr, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write)
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(ptr, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
@@ -1681,61 +1681,61 @@ attributes #0 = { noinline cold }
 ; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i8*, i32, i8*)
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32)
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32, i32, i8*, i32, i8*)
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(ptr, i64, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, ptr, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
 ; OPTIMISTIC-NEXT: declare void @__tgt_register_requires(i64)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64, i8*)
+; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(ptr, ptr, ptr, i64, i64, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_allow_completion_event(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_allow_completion_event(ptr nocapture nofree readonly, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_get_th_data(i32, ptr, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_init(i32, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+; OPTIMISTIC-NEXT: declare noalias ptr @__kmpc_task_reduction_modifier_init(ptr, i32, i32, i32, ptr)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
+; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(ptr)
 
 ; OPTIMISTIC: ; Function Attrs: cold convergent noinline nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32)
+; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(ptr nocapture nofree readonly, i32)
 
 !llvm.module.flags = !{!0}
 
diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll
index e7b81a9d8f176e..cc4a52847dffa5 100644
--- a/llvm/test/Transforms/OpenMP/always_inline_device.ll
+++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
 ; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1
-@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata"
 @G = external global i8
 
 ; Function Attrs: convergent norecurse nounwind
@@ -13,7 +13,7 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
 ; CHECK: Function Attrs: convergent norecurse nounwind
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_c0934fc2_foo_l4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
 ; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
 ; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -23,14 +23,14 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    store i8 1, i8* @G, align 1
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; CHECK-NEXT:    store i8 1, ptr @G, align 1
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       worker.exit:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
@@ -40,9 +40,9 @@ user_code.entry:                                  ; preds = %entry
   ; TODO: This is not perfect. We should rather go for SPMD mode and tell the runtime
   ;       to only spawn a single thread. Further, we then should not guard any code.
   %isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
-  store i8 %isSPMD, i8* @G
+  store i8 %isSPMD, ptr @G
   call void @bar() #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
@@ -51,9 +51,9 @@ worker.exit:                                      ; preds = %entry
 
 declare i8 @__kmpc_is_spmd_exec_mode()
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 ; Function Attrs: convergent nounwind
 define hidden void @bar() #1 {
@@ -76,7 +76,7 @@ attributes #2 = { convergent }
 !llvm.ident = !{!7}
 
 !0 = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
-!1 = !{void ()* @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
+!1 = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
 !2 = !{i32 1, !"wchar_size", i32 4}
 !3 = !{i32 7, !"openmp", i32 50}
 !4 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
index 86772ae8bd54db..9c0416af359d4d 100644
--- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
+++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll
@@ -13,10 +13,10 @@ define linkonce_odr hidden i8 @_ZStplIdESt7complexIT_ERKS2_S4_() local_unnamed_a
   ret i8 undef
 }
 
-declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, float**, float**, i64, float**, float*, float*, i64) local_unnamed_addr
+declare void @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148(i64, i64, i64, ptr, ptr, i64, ptr, ptr, ptr, i64) local_unnamed_addr
 
 declare dso_local fastcc void @__kmpc_for_static_init_8u() unnamed_addr
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (i64, i64, i64, float**, float**, i64, float**, float*, float*, i64)* @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1}
+!0 = !{ptr @__omp_offloading_2b_4010cad__ZN11qmcplusplus7ompBLAS17gemv_batched_implIfEEiRiciiPKT_PKS5_iS7_iS5_PKPS3_ii_l148, !"kernel", i32 1}
diff --git a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll
index 05dd79410cbc1f..e684f65cf2e0c0 100644
--- a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll
+++ b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll
@@ -4,13 +4,13 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
 
-%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, i8* }
+%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr }
 %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
-%"struct.(anonymous namespace)::ThreadStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", %"struct.(anonymous namespace)::ThreadStateTy"* }
+%"struct.(anonymous namespace)::ThreadStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", ptr }
 
 @_ZN12_GLOBAL__N_19TeamStateE = internal addrspace(3) global %"struct.(anonymous namespace)::TeamStateTy" undef, align 8
 
-define internal i8* @__kmpc_alloc_shared() {
+define internal ptr @__kmpc_alloc_shared() {
 lor.lhs.false.i.i:
   br i1 false, label %_ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i, label %if.then.i44.i.i
 
@@ -18,11 +18,10 @@ if.then.i44.i.i:                                  ; preds = %lor.lhs.false.i.i
   br label %_ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i
 
 _ZN4ompx5state8lookup32ENS0_9ValueKindEb.exit.i:  ; preds = %if.then.i44.i.i, %lor.lhs.false.i.i
-  %.pn.i45.i.i = phi i8* [ null, %if.then.i44.i.i ], [ addrspacecast (i8 addrspace(3)* bitcast (%"struct.(anonymous namespace)::TeamStateTy" addrspace(3)* @_ZN12_GLOBAL__N_19TeamStateE to i8 addrspace(3)*) to i8*), %lor.lhs.false.i.i ]
-  %retval.0.in.i.i.i = getelementptr inbounds i8, i8* %.pn.i45.i.i, i64 4
-  %retval.0.i46.i.i = bitcast i8* %retval.0.in.i.i.i to i32*
-  %0 = load i32, i32* %retval.0.i46.i.i, align 4
-  ret i8* null
+  %.pn.i45.i.i = phi ptr [ null, %if.then.i44.i.i ], [ addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE to ptr), %lor.lhs.false.i.i ]
+  %retval.0.in.i.i.i = getelementptr inbounds i8, ptr %.pn.i45.i.i, i64 4
+  %0 = load i32, ptr %retval.0.in.i.i.i, align 4
+  ret ptr null
 }
 
 !llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll
index efc21c5169f956..fa3fd730eb495c 100644
--- a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll
+++ b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll
@@ -4,49 +4,49 @@
 ;                                                           A              |
 ;                                                           \-------------/
 
-%"struct.TS" = type { i32, %"struct.TS"* }
+%"struct.TS" = type { i32, ptr }
 
 define weak amdgpu_kernel void @k() {
 ; CHECK-LABEL: define {{[^@]+}}@k() {
 ; CHECK-NEXT:  BB1:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       BB2:
-; CHECK-NEXT:    [[DOTPRE158_I:%.*]] = phi %struct.TS* [ null, [[BB1:%.*]] ], [ [[PRE2:%.*]], [[BB6:%.*]] ]
+; CHECK-NEXT:    [[DOTPRE158_I:%.*]] = phi ptr [ null, [[BB1:%.*]] ], [ [[PRE2:%.*]], [[BB6:%.*]] ]
 ; CHECK-NEXT:    br i1 false, label [[BB4:%.*]], label [[BB3:%.*]]
 ; CHECK:       BB3:
 ; CHECK-NEXT:    br label [[BB4]]
 ; CHECK:       BB4:
-; CHECK-NEXT:    [[PRE1:%.*]] = phi %struct.TS* [ [[DOTPRE158_I]], [[BB3]] ], [ null, [[BB2]] ]
+; CHECK-NEXT:    [[PRE1:%.*]] = phi ptr [ [[DOTPRE158_I]], [[BB3]] ], [ null, [[BB2]] ]
 ; CHECK-NEXT:    br i1 false, label [[BB6]], label [[BB5:%.*]]
 ; CHECK:       BB5:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], %struct.TS* [[PRE1]], i64 0, i32 1
-; CHECK-NEXT:    [[Q3:%.*]] = load %struct.TS*, %struct.TS** [[GEP]], align 8
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], ptr [[PRE1]], i64 0, i32 1
+; CHECK-NEXT:    [[Q3:%.*]] = load ptr, ptr [[GEP]], align 8
 ; CHECK-NEXT:    br label [[BB6]]
 ; CHECK:       BB6:
-; CHECK-NEXT:    [[PRE2]] = phi %struct.TS* [ null, [[BB4]] ], [ [[Q3]], [[BB5]] ]
+; CHECK-NEXT:    [[PRE2]] = phi ptr [ null, [[BB4]] ], [ [[Q3]], [[BB5]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ;
 BB1:
   br label %BB2
 
 BB2:
-  %.pre158.i = phi %"struct.TS"* [ null, %BB1 ], [ %pre2, %BB6 ]
+  %.pre158.i = phi ptr [ null, %BB1 ], [ %pre2, %BB6 ]
   br i1 false, label %BB4, label %BB3
 
 BB3:
   br label %BB4
 
 BB4:
-  %pre1 = phi %"struct.TS"* [ %.pre158.i, %BB3 ], [ null, %BB2 ]
+  %pre1 = phi ptr [ %.pre158.i, %BB3 ], [ null, %BB2 ]
   br i1 false, label %BB6, label %BB5
 
 BB5:
-  %gep = getelementptr inbounds %"struct.TS", %"struct.TS"* %pre1, i64 0, i32 1
-  %q3 = load %"struct.TS"*, %"struct.TS"** %gep, align 8
+  %gep = getelementptr inbounds %"struct.TS", ptr %pre1, i64 0, i32 1
+  %q3 = load ptr, ptr %gep, align 8
   br label %BB6
 
 BB6:
-  %pre2 = phi %"struct.TS"* [ null, %BB4 ], [ %q3, %BB5 ]
+  %pre2 = phi ptr [ null, %BB4 ], [ %q3, %BB5 ]
   br label %BB2
 }
 
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index d7f92d872de32b..29387947a4f5cb 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -119,10 +119,10 @@
 ;;   { weak_callee_empty(); }
 ;; }
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1
@@ -131,25 +131,25 @@
 @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
 @G = external global i32, align 4
-@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
-@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
+@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
+@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
 
 define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 {
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
@@ -157,16 +157,16 @@ worker.exit:                                      ; preds = %entry
 }
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
   ret i32 0
 }
 
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @no_parallel_region_in_here() #7
   call void @unknown_no_openmp() #8
   ret void
@@ -174,72 +174,70 @@ entry:
 
 define hidden void @no_parallel_region_in_here() #1 {
 entry:
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0)
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  %1 = call i32 @__kmpc_single(ptr @2, i32 %0)
   %2 = icmp ne i32 %1, 0
   br i1 %2, label %omp_if.then, label %omp_if.end
 
 omp_if.then:                                      ; preds = %entry
-  store i32 0, i32* @G, align 4
-  call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0)
+  store i32 0, ptr @G, align 4
+  call void @__kmpc_end_single(ptr @2, i32 %0)
   br label %omp_if.end
 
 omp_if.end:                                       ; preds = %omp_if.then, %entry
-  call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0)
+  call void @__kmpc_barrier(ptr @3, i32 %0)
   ret void
 }
 
 declare void @unknown_no_openmp() #2
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
+declare i32 @__kmpc_global_thread_num(ptr) #3
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @no_parallel_region_in_here() #7
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -251,25 +249,25 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-declare void @__kmpc_get_shared_variables(i8***)
+declare void @__kmpc_get_shared_variables(ptr)
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
 
-define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -281,12 +279,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -294,55 +292,53 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
   call void @simple_state_machine_interprocedural_before() #7
   call void @no_parallel_region_in_here() #7
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @simple_state_machine_interprocedural_after() #7
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_before() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -352,21 +348,20 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_after() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
@@ -374,46 +369,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0)
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
   %call = call i32 @unknown() #7
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -423,23 +416,23 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 declare i32 @unknown() #4
 
-define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -449,12 +442,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -462,46 +455,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_att
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0)
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @unknown_no_openmp() #8
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -511,21 +502,21 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -535,12 +526,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -548,47 +539,45 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @unknown_pure() #9
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -598,23 +587,23 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 declare void @unknown_pure() #5
 
-define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -624,12 +613,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -637,29 +626,29 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %call = call i32 @omp_get_thread_num() #7
   call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
   ret void
 }
@@ -667,8 +656,8 @@ entry:
 define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %0 = load i32, i32* %a.addr, align 4
+  store i32 %a, ptr %a.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -676,7 +665,7 @@ if.then:                                          ; preds = %entry
   br label %return
 
 if.end:                                           ; preds = %entry
-  %1 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %a.addr, align 4
   %sub = sub nsw i32 %1, 1
   call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
   call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
@@ -692,28 +681,28 @@ define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @weak_callee_empty() #7
   ret void
 }
@@ -723,18 +712,18 @@ entry:
   ret void
 }
 
-declare i32 @__kmpc_single(%struct.ident_t*, i32) #6
+declare i32 @__kmpc_single(ptr, i32) #6
 
-declare void @__kmpc_end_single(%struct.ident_t*, i32) #6
+declare void @__kmpc_end_single(ptr, i32) #6
 
-declare void @__kmpc_barrier(%struct.ident_t*, i32) #6
+declare void @__kmpc_barrier(ptr, i32) #6
 
-define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -744,21 +733,21 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -768,30 +757,29 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -801,12 +789,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -833,14 +821,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
 !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
 !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
+!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
+!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
+!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
+!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
+!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
+!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
+!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
+!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
 !16 = !{i32 1, !"wchar_size", i32 4}
 !17 = !{i32 7, !"openmp", i32 50}
 !18 = !{i32 7, !"openmp-device", i32 50}
@@ -850,29 +838,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; AMDGPU-NEXT:    ret i32 0
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
@@ -882,16 +870,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU:       omp_if.then:
-; AMDGPU-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU:       omp_if.end:
-; AMDGPU-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -899,16 +887,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU:       omp_if.then:
-; AMDGPU-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU:       omp_if.end:
-; AMDGPU-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -916,10 +904,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -929,19 +917,19 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__2_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
@@ -957,16 +945,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -974,30 +962,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1009,18 +995,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1032,9 +1018,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1042,10 +1028,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1055,25 +1041,25 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check1:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute2:
 ; AMDGPU-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
@@ -1089,16 +1075,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1106,19 +1092,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1127,10 +1112,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1138,19 +1122,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1162,9 +1145,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1172,10 +1155,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1183,10 +1165,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1194,10 +1175,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1207,25 +1188,25 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check1:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__8_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute2:
 ; AMDGPU-NEXT:    call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
@@ -1237,16 +1218,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1254,29 +1235,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1288,18 +1267,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1311,9 +1290,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1321,10 +1300,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1334,19 +1313,19 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__10_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
@@ -1362,16 +1341,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1379,29 +1358,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1413,18 +1390,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1436,9 +1413,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1446,10 +1423,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1459,19 +1436,19 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__13_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
@@ -1487,16 +1464,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1504,29 +1481,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0)
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1538,18 +1513,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1561,9 +1536,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1571,10 +1546,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1584,19 +1559,19 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]])
@@ -1608,15 +1583,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1624,11 +1599,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1638,14 +1613,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU:       if.then:
 ; AMDGPU-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU:       if.end:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -1659,14 +1634,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU:       if.then:
 ; AMDGPU-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU:       if.end:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -1679,10 +1654,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1692,12 +1667,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
@@ -1710,15 +1685,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1726,10 +1701,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1743,10 +1718,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1758,18 +1733,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1781,9 +1756,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1791,10 +1766,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1802,19 +1776,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1826,9 +1799,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1838,29 +1811,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; NVPTX-NEXT:    ret i32 0
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; NVPTX-NEXT:    ret void
@@ -1870,16 +1843,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX:       omp_if.then:
-; NVPTX-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX:       omp_if.end:
-; NVPTX-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1887,16 +1860,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX:       omp_if.then:
-; NVPTX-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX:       omp_if.end:
-; NVPTX-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1904,10 +1877,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -1917,18 +1890,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__2_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
@@ -1944,16 +1917,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1961,30 +1934,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1996,18 +1967,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2019,9 +1990,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2029,10 +2000,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2042,24 +2013,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check1:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute2:
 ; NVPTX-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
@@ -2075,16 +2046,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2092,19 +2063,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2113,10 +2083,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2124,19 +2093,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2148,9 +2116,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2158,10 +2126,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2169,10 +2136,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2180,10 +2146,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2193,24 +2159,24 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check1:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__8_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute2:
 ; NVPTX-NEXT:    call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
@@ -2222,16 +2188,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2239,29 +2205,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2273,18 +2237,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2296,9 +2260,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2306,10 +2270,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2319,18 +2283,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__10_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
@@ -2346,16 +2310,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2363,29 +2327,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2397,18 +2359,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2420,9 +2382,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2430,10 +2392,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2443,18 +2405,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__13_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]])
@@ -2470,16 +2432,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2487,29 +2449,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0)
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2521,18 +2481,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2544,9 +2504,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2554,10 +2514,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2567,18 +2527,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]])
@@ -2590,15 +2550,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2606,11 +2566,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2620,14 +2580,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX:       if.then:
 ; NVPTX-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX:       if.end:
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -2641,14 +2601,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX:       if.then:
 ; NVPTX-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX:       if.end:
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -2661,10 +2621,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2674,11 +2634,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
@@ -2691,15 +2651,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2707,10 +2667,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2724,10 +2684,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2739,18 +2699,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2762,9 +2722,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2772,10 +2732,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2783,19 +2742,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2807,9 +2765,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2819,29 +2777,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; AMDGPU-DISABLED-NEXT:    ret i32 0
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -2851,16 +2809,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU-DISABLED:       omp_if.then:
-; AMDGPU-DISABLED-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU-DISABLED:       omp_if.end:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -2868,16 +2826,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU-DISABLED:       omp_if.then:
-; AMDGPU-DISABLED-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU-DISABLED:       omp_if.end:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -2887,14 +2845,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -2902,30 +2860,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -2937,18 +2893,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -2960,9 +2916,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -2972,14 +2928,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -2987,19 +2943,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3008,10 +2963,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3019,19 +2973,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3043,9 +2996,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3053,10 +3006,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3064,10 +3016,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3077,14 +3028,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -3092,29 +3043,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3126,18 +3075,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3149,9 +3098,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3161,14 +3110,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -3176,29 +3125,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3210,18 +3157,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3233,9 +3180,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3245,14 +3192,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -3260,29 +3207,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0)
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3294,18 +3239,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3317,9 +3262,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3329,13 +3274,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -3343,11 +3288,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3357,14 +3302,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU-DISABLED:       if.then:
 ; AMDGPU-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU-DISABLED:       if.end:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -3378,14 +3323,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU-DISABLED:       if.then:
 ; AMDGPU-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU-DISABLED:       if.end:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -3400,13 +3345,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker.exit:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -3414,10 +3359,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3431,10 +3376,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3446,18 +3391,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3469,9 +3414,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3479,10 +3424,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3490,19 +3434,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
 ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
@@ -3514,9 +3457,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3526,29 +3469,29 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; NVPTX-DISABLED-NEXT:    ret i32 0
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3558,16 +3501,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX-DISABLED:       omp_if.then:
-; NVPTX-DISABLED-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX-DISABLED:       omp_if.end:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3575,16 +3518,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-DISABLED-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX-DISABLED:       omp_if.then:
-; NVPTX-DISABLED-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX-DISABLED:       omp_if.end:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3594,14 +3537,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3609,30 +3552,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3644,18 +3585,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3667,9 +3608,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3679,14 +3620,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3694,19 +3635,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3715,10 +3655,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3726,19 +3665,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3750,9 +3688,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3760,10 +3698,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3771,10 +3708,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3784,14 +3720,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3799,29 +3735,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3833,18 +3767,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3856,9 +3790,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3868,14 +3802,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3883,29 +3817,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3917,18 +3849,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -3940,9 +3872,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -3952,14 +3884,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -3967,29 +3899,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0)
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4001,18 +3931,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4024,9 +3954,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -4036,13 +3966,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -4050,11 +3980,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4064,14 +3994,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX-DISABLED:       if.then:
 ; NVPTX-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX-DISABLED:       if.end:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -4085,14 +4015,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-DISABLED-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-DISABLED-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX-DISABLED:       if.then:
 ; NVPTX-DISABLED-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX-DISABLED:       if.end:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-DISABLED-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -4107,13 +4037,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker.exit:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -4121,10 +4051,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4138,10 +4068,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4153,18 +4083,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4176,9 +4106,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -4186,10 +4116,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
@@ -4197,19 +4126,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 ;
 ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
@@ -4221,8 +4149,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index 4fb9a4d55dcdc1..6dae1732b8c8a1 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -119,10 +119,10 @@
 ;;   { weak_callee_empty(); }
 ;; }
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1
@@ -131,25 +131,25 @@
 @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1
 @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
 @G = external global i32, align 4
-@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
-@llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, i8* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, i8* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
+@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
+@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
 
 define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 {
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
@@ -157,14 +157,14 @@ worker.exit:                                      ; preds = %entry
 }
 
 ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO.
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1);
+declare i32 @__kmpc_target_init(ptr, i8, i1);
 
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @no_parallel_region_in_here() #7
   call void @unknown_no_openmp() #8
   ret void
@@ -172,72 +172,70 @@ entry:
 
 define hidden void @no_parallel_region_in_here() #1 {
 entry:
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0)
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  %1 = call i32 @__kmpc_single(ptr @2, i32 %0)
   %2 = icmp ne i32 %1, 0
   br i1 %2, label %omp_if.then, label %omp_if.end
 
 omp_if.then:                                      ; preds = %entry
-  store i32 0, i32* @G, align 4
-  call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0)
+  store i32 0, ptr @G, align 4
+  call void @__kmpc_end_single(ptr @2, i32 %0)
   br label %omp_if.end
 
 omp_if.end:                                       ; preds = %omp_if.then, %entry
-  call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0)
+  call void @__kmpc_barrier(ptr @3, i32 %0)
   ret void
 }
 
 declare void @unknown_no_openmp() #2
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
+declare i32 @__kmpc_global_thread_num(ptr) #3
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @no_parallel_region_in_here() #7
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -249,25 +247,25 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-declare void @__kmpc_get_shared_variables(i8***)
+declare void @__kmpc_get_shared_variables(ptr)
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
 
-define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -279,12 +277,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -292,55 +290,53 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
   call void @simple_state_machine_interprocedural_before() #7
   call void @no_parallel_region_in_here() #7
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @simple_state_machine_interprocedural_after() #7
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_before() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -350,21 +346,20 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_after() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
@@ -372,46 +367,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0)
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
   %call = call i32 @unknown() #7
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -421,23 +414,23 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 declare i32 @unknown() #4
 
-define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -447,12 +440,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -460,46 +453,44 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_att
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0)
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @unknown_no_openmp() #8
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -509,21 +500,21 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -533,12 +524,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -546,47 +537,45 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77() #0
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown_no_openmp() #8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0)
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @unknown_pure() #9
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -596,23 +585,23 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 declare void @unknown_pure() #5
 
-define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p1() #7
   ret void
 }
@@ -622,12 +611,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -635,29 +624,29 @@ define weak void @__omp_offloading_14_a36502b_simple_state_machine_interprocedur
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %call = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #7
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %call = call i32 @omp_get_thread_num() #7
   call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
   ret void
 }
@@ -665,8 +654,8 @@ entry:
 define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %0 = load i32, i32* %a.addr, align 4
+  store i32 %a, ptr %a.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -674,7 +663,7 @@ if.then:                                          ; preds = %entry
   br label %return
 
 if.end:                                           ; preds = %entry
-  %1 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %a.addr, align 4
   %sub = sub nsw i32 %1, 1
   call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
   call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
@@ -690,28 +679,28 @@ define weak void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @weak_callee_empty() #7
   ret void
 }
@@ -721,18 +710,18 @@ entry:
   ret void
 }
 
-declare i32 @__kmpc_single(%struct.ident_t*, i32) #6
+declare i32 @__kmpc_single(ptr, i32) #6
 
-declare void @__kmpc_end_single(%struct.ident_t*, i32) #6
+declare void @__kmpc_end_single(ptr, i32) #6
 
-declare void @__kmpc_barrier(%struct.ident_t*, i32) #6
+declare void @__kmpc_barrier(ptr, i32) #6
 
-define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -742,21 +731,21 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -766,30 +755,29 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
 define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @p0() #7
   ret void
 }
@@ -799,12 +787,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
@@ -831,14 +819,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 !5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
 !6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
 !7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!8 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-!9 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-!10 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-!11 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-!12 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-!13 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-!14 = !{void ()* @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-!15 = !{void ()* @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
+!8 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
+!9 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
+!10 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
+!11 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
+!12 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
+!13 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
+!14 = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
+!15 = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
 !16 = !{i32 1, !"wchar_size", i32 4}
 !17 = !{i32 7, !"openmp", i32 50}
 !18 = !{i32 7, !"openmp-device", i32 50}
@@ -848,13 +836,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -862,10 +850,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
@@ -875,16 +863,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU:       omp_if.then:
-; AMDGPU-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU:       omp_if.end:
-; AMDGPU-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -892,16 +880,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; AMDGPU:       omp_if.then:
-; AMDGPU-NEXT:    store i32 0, i32* @G, align 4
-; AMDGPU-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    store i32 0, ptr @G, align 4
+; AMDGPU-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[OMP_IF_END]]
 ; AMDGPU:       omp_if.end:
-; AMDGPU-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -911,14 +899,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -926,30 +914,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -961,18 +947,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -984,9 +970,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -996,14 +982,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1011,19 +997,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; AMDGPU-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1032,10 +1017,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1043,19 +1027,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1067,9 +1050,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1077,10 +1060,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1088,10 +1070,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1101,14 +1082,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1116,29 +1097,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1150,18 +1129,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1173,9 +1152,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1185,14 +1164,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1200,29 +1179,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0)
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1234,18 +1211,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1257,9 +1234,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1269,14 +1246,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; AMDGPU-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; AMDGPU-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1284,29 +1261,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; AMDGPU-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0)
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1318,18 +1293,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p1() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1341,9 +1316,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1353,13 +1328,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1367,11 +1342,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1381,14 +1356,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU:       if.then:
 ; AMDGPU-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU:       if.end:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -1402,14 +1377,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; AMDGPU:       if.then:
 ; AMDGPU-NEXT:    br label [[RETURN:%.*]]
 ; AMDGPU:       if.end:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; AMDGPU-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; AMDGPU-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -1424,13 +1399,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker.exit:
 ; AMDGPU-NEXT:    ret void
@@ -1438,10 +1413,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1455,10 +1430,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1470,18 +1445,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1493,9 +1468,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1503,10 +1478,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1514,19 +1488,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; AMDGPU-SAME: () #[[ATTR1]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    ret void
 ;
 ;
 ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; AMDGPU-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; AMDGPU-NEXT:    call void @p0() #[[ATTR10]]
 ; AMDGPU-NEXT:    ret void
 ;
@@ -1538,9 +1511,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; AMDGPU-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; AMDGPU-NEXT:    ret void
 ;
 ;
@@ -1550,13 +1523,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1564,10 +1537,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]]
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
 ; NVPTX-NEXT:    ret void
@@ -1577,16 +1550,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
 ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX:       omp_if.then:
-; NVPTX-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX:       omp_if.end:
-; NVPTX-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1594,16 +1567,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; NVPTX-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
 ; NVPTX:       omp_if.then:
-; NVPTX-NEXT:    store i32 0, i32* @G, align 4
-; NVPTX-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    store i32 0, ptr @G, align 4
+; NVPTX-NEXT:    call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[OMP_IF_END]]
 ; NVPTX:       omp_if.end:
-; NVPTX-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1613,14 +1586,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1628,30 +1601,28 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1663,18 +1634,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1686,9 +1657,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1698,14 +1669,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1713,19 +1684,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]]
 ; NVPTX-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR8]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1734,10 +1704,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1745,19 +1714,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1769,9 +1737,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1779,10 +1747,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1790,10 +1757,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1803,14 +1769,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1818,29 +1784,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1852,18 +1816,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1875,9 +1839,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1887,14 +1851,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1902,29 +1866,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0)
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1936,18 +1898,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -1959,9 +1921,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -1971,14 +1933,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; NVPTX-NEXT:    call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
+; NVPTX-NEXT:    call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -1986,29 +1948,27 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 ; NVPTX-NEXT:    call void @unknown_no_openmp() #[[ATTR9]]
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0)
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2020,18 +1980,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p1() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2043,9 +2003,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2055,13 +2015,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2069,11 +2029,11 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]]
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2083,14 +2043,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX:       if.then:
 ; NVPTX-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX:       if.end:
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
@@ -2104,14 +2064,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NVPTX:       if.then:
 ; NVPTX-NEXT:    br label [[RETURN:%.*]]
 ; NVPTX:       if.end:
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; NVPTX-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]]
 ; NVPTX-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]]
@@ -2126,13 +2086,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker.exit:
 ; NVPTX-NEXT:    ret void
@@ -2140,10 +2100,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @weak_callee_empty() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2157,10 +2117,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2172,18 +2132,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2195,9 +2155,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2205,10 +2165,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
@@ -2216,19 +2175,18 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
 ; NVPTX-SAME: () #[[ATTR1]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    ret void
 ;
 ;
 ; NVPTX: Function Attrs: convergent noinline norecurse nounwind
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; NVPTX-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    call void @p0() #[[ATTR10]]
 ; NVPTX-NEXT:    ret void
 ;
@@ -2240,8 +2198,8 @@ attributes #9 = { convergent nounwind readonly willreturn }
 ; NVPTX-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
 ; NVPTX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
index d4e33c87aaf2f7..e1b7b37f3bcb66 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
@@ -35,32 +35,32 @@ target triple = "nvptx64"
 ;;   }
 ;; }
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1
-@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([82 x i8], [82 x i8]* @2, i32 0, i32 0) }, align 8
+@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
 @4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
-@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([114 x i8], [114 x i8]* @4, i32 0, i32 0) }, align 8
+@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
 @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
 @6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
-@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([116 x i8], [116 x i8]* @6, i32 0, i32 0) }, align 8
+@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
 @8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
-@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([85 x i8], [85 x i8]* @8, i32 0, i32 0) }, align 8
+@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
 @10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
-@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([117 x i8], [117 x i8]* @10, i32 0, i32 0) }, align 8
+@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
 @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
 @12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
-@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([73 x i8], [73 x i8]* @12, i32 0, i32 0) }, align 8
+@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
 @G = external global i32
-@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
 
 ; Function Attrs: convergent norecurse nounwind
 define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
 entry:
-  %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
+  %captured_vars_addrs.i.i = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
   %exec_user_code = icmp eq i32 %0, -1, !dbg !18
   br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
 
@@ -68,21 +68,19 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void, !dbg !19
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3
   call void @unknown() #6, !dbg !20
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3
+  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
   call void @unknown() #6, !dbg !27
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
+  call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
   br label %common.ret
 }
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
   ret i32 0
 }
 
@@ -92,23 +90,22 @@ declare void @unknown() local_unnamed_addr #1
 ; Function Attrs: nounwind
 define hidden void @known() local_unnamed_addr #2 !dbg !29 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
-  %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
-  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13)
+  call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30
   ret void, !dbg !31
 }
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
+declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
 
 ; Function Attrs: norecurse nounwind
 define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
 entry:
-  %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
+  %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
   %exec_user_code = icmp eq i32 %0, -1, !dbg !33
   br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
 
@@ -116,29 +113,27 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void, !dbg !34
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
   call void @no_openmp()
   call void @no_parallelism()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
+  call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
   br label %common.ret
 }
 
 ; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
+define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
 entry:
   call void @unknown() #6, !dbg !48
   ret void, !dbg !49
@@ -147,21 +142,21 @@ entry:
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
 entry:
-  %global_args = alloca i8**, align 8
-  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
+  %global_args = alloca ptr, align 8
+  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51
   call void @unknown() #6, !dbg !52
   ret void, !dbg !51
 }
 
-declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
+declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5
 
 declare void @no_openmp() #7
 declare void @no_parallelism() #8
@@ -187,8 +182,8 @@ attributes #8 = { "llvm.assume"="omp_no_parallelism" }
 !2 = !{}
 !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
 !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
+!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
+!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
 !7 = !{i32 7, !"Dwarf Version", i32 2}
 !8 = !{i32 2, !"Debug Info Version", i32 3}
 !9 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/dead_use.ll b/llvm/test/Transforms/OpenMP/dead_use.ll
index c116b620b8f078..b3f5194b10fc36 100644
--- a/llvm/test/Transforms/OpenMP/dead_use.ll
+++ b/llvm/test/Transforms/OpenMP/dead_use.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
 ; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
 
 ; Function Attrs: nounwind uwtable
 define dso_local i32 @b() #0 {
@@ -11,12 +11,12 @@ define dso_local i32 @b() #0 {
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @a()
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %1 = alloca i32, align 4
   %2 = call i32 @a()
-  %3 = load i32, i32* %1, align 4
+  %3 = load i32, ptr %1, align 4
   ret i32 %3
 }
 
@@ -26,36 +26,36 @@ define internal i32 @a() #0 {
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @b()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB0:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.)
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %1 = alloca i32, align 4
   %2 = call i32 @b()
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-  %3 = load i32, i32* %1, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @0, i32 0, ptr @.omp_outlined.)
+  %3 = load i32, ptr %1, align 4
   ret i32 %3
 }
 
 ; Function Attrs: norecurse nounwind uwtable
-define internal void @.omp_outlined.(i32* noalias %0, i32* noalias %1) #1 {
+define internal void @.omp_outlined.(ptr noalias %0, ptr noalias %1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias [[TMP0:%.*]], i32* noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[TMP0]], i32** [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]]
-; CHECK-NEXT:    store i32* [[TMP1]], i32** [[TMP4]], align 8, !tbaa [[TBAA2]]
+; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP3]], align 8, !tbaa [[TBAA2:![0-9]+]]
+; CHECK-NEXT:    store ptr [[TMP1]], ptr [[TMP4]], align 8, !tbaa [[TBAA2]]
 ; CHECK-NEXT:    ret void
 ;
-  %3 = alloca i32*, align 8
-  %4 = alloca i32*, align 8
-  store i32* %0, i32** %3, align 8, !tbaa !2
-  store i32* %1, i32** %4, align 8, !tbaa !2
+  %3 = alloca ptr, align 8
+  %4 = alloca ptr, align 8
+  store ptr %0, ptr %3, align 8, !tbaa !2
+  store ptr %1, ptr %4, align 8, !tbaa !2
   ret void
 }
 
 ; Function Attrs: nounwind
-declare !callback !6 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #2
+declare !callback !6 void @__kmpc_fork_call(ptr, i32, ptr, ...) #2
 
 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index 070b027ae4ccb8..df5c7737ee39c1 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -2,33 +2,33 @@
 ; RUN: opt -passes=openmp-opt-cgscc -S < %s | FileCheck %s
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
-@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str1 }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str2 }, align 8
 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1
 @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1
 
 ; UTC_ARGS: --disable
-; CHECK-DAG: @0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
-; CHECK-DAG: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8
-; CHECK-DAG: @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8
+; CHECK-DAG: @0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8
+; CHECK-DAG: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str1 }, align 8
+; CHECK-DAG: @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str2 }, align 8
 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1
 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1
 ; UTC_ARGS: --enable
 
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_global_thread_num(ptr)
 declare void @useI32(i32)
 
 define void @external(i1 %c) {
 ; CHECK-LABEL: define {{[^@]+}}@external
 ; CHECK-SAME: (i1 [[C:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB0:[0-9]+]])
+; CHECK-NEXT:    [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB0:[0-9]+]])
 ; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
 ; CHECK:       t:
 ; CHECK-NEXT:    call void @internal(i32 [[C2]], i32 [[C2]])
@@ -46,17 +46,17 @@ define void @external(i1 %c) {
 entry:
   br i1 %c, label %t, label %e
 t:
-  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @internal(i32 %c0, i32 %c0)
   call void @useI32(i32 %c0)
   br label %m
 e:
-  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @internal(i32 %c1, i32 %c1)
   call void @useI32(i32 %c1)
   br label %m
 m:
-  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @internal(i32 0, i32 %c2)
   call void @useI32(i32 %c2)
   ret void
@@ -80,20 +80,20 @@ define internal void @internal(i32 %not_gtid, i32 %gtid) {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %cc = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   %c = icmp eq i32 %cc, %gtid
   br i1 %c, label %t, label %e
 t:
-  %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @useI32(i32 %c0)
   call void @external(i1 %c)
   br label %m
 e:
-  %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @useI32(i32 %c1)
   br label %m
 m:
-  %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+  %c2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0)
   call void @useI32(i32 %c2)
   ret void
 }
@@ -102,7 +102,7 @@ m:
 define void @local_and_global_gtid_calls() {
 ; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+; CHECK-NEXT:    [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]])
 ; CHECK-NEXT:    [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
 ; CHECK-NEXT:    call void @useI32(i32 [[TID5]])
 ; CHECK-NEXT:    call void @useI32(i32 [[TID5]])
@@ -114,15 +114,15 @@ define void @local_and_global_gtid_calls() {
 ;
 entry:
   %.kmpc_loc.addr = alloca %struct.ident_t, align 8
-  %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr)
-  %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr)
+  %tid0 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr)
+  %tid1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  %tid2 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr)
   call void @useI32(i32 %tid0)
   call void @useI32(i32 %tid1)
   call void @useI32(i32 %tid2)
-  %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr)
-  %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr)
+  %tid3 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr)
+  %tid4 = call i32 @__kmpc_global_thread_num(ptr @2)
+  %tid5 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr)
   call void @useI32(i32 %tid3)
   call void @useI32(i32 %tid4)
   call void @useI32(i32 %tid5)
@@ -132,7 +132,7 @@ entry:
 define void @local_gtid_calls_only() {
 ; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
+; CHECK-NEXT:    [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]])
 ; CHECK-NEXT:    [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8
 ; CHECK-NEXT:    [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
 ; CHECK-NEXT:    [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8
@@ -148,15 +148,15 @@ entry:
   %.kmpc_loc.addr1 = alloca %struct.ident_t, align 8
   %.kmpc_loc.addr2 = alloca %struct.ident_t, align 8
   %.kmpc_loc.addr3 = alloca %struct.ident_t, align 8
-  %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1)
-  %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2)
-  %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3)
+  %tid0 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr1)
+  %tid1 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr2)
+  %tid2 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr3)
   call void @useI32(i32 %tid0)
   call void @useI32(i32 %tid1)
   call void @useI32(i32 %tid2)
-  %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1)
-  %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2)
-  %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3)
+  %tid3 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr1)
+  %tid4 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr2)
+  %tid5 = call i32 @__kmpc_global_thread_num(ptr %.kmpc_loc.addr3)
   call void @useI32(i32 %tid3)
   call void @useI32(i32 %tid4)
   call void @useI32(i32 %tid5)
diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
index a97f4da66f04f8..1b0f00603ff451 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll
@@ -4,24 +4,24 @@ source_filename = "deduplication_remarks.c"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, ptr @.str0 }, align 8
 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 
 ; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
 ; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
 define dso_local void @deduplicate() local_unnamed_addr !dbg !14 {
-  %1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21
+  %1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !21
   call void @useI32(i32 %1), !dbg !23
-  %2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !24
+  %2 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !24
   call void @useI32(i32 %2), !dbg !25
-  %3 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !26
+  %3 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @0), !dbg !26
   call void @useI32(i32 %3), !dbg !27
   ret void, !dbg !28
 }
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_global_thread_num(ptr)
 
 declare !dbg !4 void @useI32(i32) local_unnamed_addr
 
diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll
index cf83e7900a1748..bfb9e3bc457a03 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_target.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll
@@ -4,13 +4,13 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8
 @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0
-@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata"
 
 declare void @use(i32)
 
@@ -18,38 +18,38 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       worker.exit:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @2)
+  %2 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_target_deinit(ptr @1, i8 2)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1
+declare i32 @__kmpc_global_thread_num(ptr) #1
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
 attributes #1 = { nounwind }
@@ -59,7 +59,7 @@ attributes #1 = { nounwind }
 !llvm.module.flags = !{!2, !3, !4}
 
 !0 = !{i32 0, i32 80, i32 -1545561096, !"foo", i32 2, i32 0}
-!1 = !{void ()* @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1}
+!1 = !{ptr @__omp_offloading_50_a3e09bf8_foo_l2, !"kernel", i32 1}
 !2 = !{i32 1, !"wchar_size", i32 4}
 !3 = !{i32 7, !"openmp", i32 50}
 !4 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index 552f941ba90d08..5376a88d554732 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @kernel0_exec_mode = weak constant i8 1
 
@@ -13,12 +13,12 @@ target triple = "nvptx64"
 ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
 define weak void @kernel0() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel0
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
 ; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -28,15 +28,15 @@ define weak void @kernel0() #0 {
 ; CHECK-NEXT:    call void @helper0() #[[ATTR1:[0-9]+]]
 ; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
 ; CHECK-NEXT:    call void @helper2() #[[ATTR1]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
 
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
   call void @helper0()
   call void @helper1()
   call void @helper2()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
@@ -45,7 +45,7 @@ define weak void @kernel0() #0 {
 define weak void @kernel1() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
 ; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
 ; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -53,13 +53,13 @@ define weak void @kernel1() #0 {
 ; CHECK-NEXT:    ret void
 ; CHECK:       main.thread.user_code:
 ; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
 
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
   call void @helper1()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
@@ -69,25 +69,24 @@ define weak void @kernel2() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR1]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR1]]
 ; CHECK-NEXT:    call void @helper0() #[[ATTR1]]
 ; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
 ; CHECK-NEXT:    call void @helper2() #[[ATTR1]]
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %i, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -95,13 +94,12 @@ common.ret:
   ret void
 
 user_code.entry:
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
+  %0 = call i32 @__kmpc_global_thread_num(ptr null)
   call void @helper0()
   call void @helper1()
   call void @helper2()
-  call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
@@ -114,18 +112,18 @@ define internal void @helper0() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store i32 666, i32* @G, align 4
+; CHECK-NEXT:    store i32 666, ptr @G, align 4
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
-  store i32 %threadLimit, i32* @G
+  store i32 %threadLimit, ptr @G
   ret void
 }
 
@@ -157,24 +155,24 @@ define internal void @helper2() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store i32 666, i32* @G, align 4
+; CHECK-NEXT:    store i32 666, ptr @G, align 4
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
-  store i32 %threadLimit, i32* @G
+  store i32 %threadLimit, ptr @G
   ret void
 }
 
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) {
+; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
@@ -202,10 +200,10 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
   ret i32 %ret
 }
 declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) #1
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
+declare i32 @__kmpc_global_thread_num(ptr)
 
 
 !llvm.module.flags = !{!0, !1}
@@ -215,9 +213,9 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{void ()* @kernel0, !"kernel", i32 1}
-!3 = !{void ()* @kernel1, !"kernel", i32 1}
-!4 = !{void ()* @kernel2, !"kernel", i32 1}
+!2 = !{ptr @kernel0, !"kernel", i32 1}
+!3 = !{ptr @kernel1, !"kernel", i32 1}
+!4 = !{ptr @kernel2, !"kernel", i32 1}
 ;
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
@@ -227,7 +225,7 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{void ()* @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{void ()* @kernel2, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
 ;.
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
index a4ccea5a4e69b1..591892f03755a3 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @kernel0_exec_mode = weak constant i8 1
 
@@ -16,18 +16,18 @@ target triple = "nvptx64"
 define weak void @kernel0() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel0
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
 ; CHECK-NEXT:    call void @helper0()
 ; CHECK-NEXT:    call void @helper1()
 ; CHECK-NEXT:    call void @helper2()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i1 true)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
   call void @helper0()
   call void @helper1()
   call void @helper2()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true)
+  call void @__kmpc_target_deinit(ptr null, i1 true)
   ret void
 }
 
@@ -36,14 +36,14 @@ define weak void @kernel0() #0 {
 define weak void @kernel1() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
 ; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
 ; CHECK-NEXT:    call void @helper1()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
   call void @helper1()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
+  call void @__kmpc_target_deinit(ptr null, i1 false)
   ret void
 }
 
@@ -52,18 +52,18 @@ define weak void @kernel1() #0 {
 define weak void @kernel2() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
 ; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
 ; CHECK-NEXT:    call void @helper0()
 ; CHECK-NEXT:    call void @helper1()
 ; CHECK-NEXT:    call void @helper2()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
   call void @helper0()
   call void @helper1()
   call void @helper2()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
+  call void @__kmpc_target_deinit(ptr null, i1 false)
   ret void
 }
 
@@ -71,11 +71,11 @@ define internal void @helper0() {
 ; CHECK-LABEL: define {{[^@]+}}@helper0
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT:    store i32 [[THREADLIMIT]], i32* @G, align 4
+; CHECK-NEXT:    store i32 [[THREADLIMIT]], ptr @G, align 4
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
-  store i32 %threadLimit, i32* @G
+  store i32 %threadLimit, ptr @G
   ret void
 }
 
@@ -103,17 +103,17 @@ f:
 define internal void @helper2() {
 ; CHECK-LABEL: define {{[^@]+}}@helper2() {
 ; CHECK-NEXT:    [[THREADLIMIT:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR1]]
-; CHECK-NEXT:    store i32 [[THREADLIMIT]], i32* @G, align 4
+; CHECK-NEXT:    store i32 [[THREADLIMIT]], ptr @G, align 4
 ; CHECK-NEXT:    ret void
 ;
   %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
-  store i32 %threadLimit, i32* @G
+  store i32 %threadLimit, ptr @G
   ret void
 }
 
 declare i32 @__kmpc_get_hardware_num_threads_in_block()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext) #1
+declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1
 
 
 !llvm.module.flags = !{!0, !1}
@@ -123,9 +123,9 @@ attributes #0 = { optnone noinline "omp_target_thread_limit"="666" "omp_target_n
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{void ()* @kernel0, !"kernel", i32 1}
-!3 = !{void ()* @kernel1, !"kernel", i32 1}
-!4 = !{void ()* @kernel2, !"kernel", i32 1}
+!2 = !{ptr @kernel0, !"kernel", i32 1}
+!3 = !{ptr @kernel1, !"kernel", i32 1}
+!4 = !{ptr @kernel2, !"kernel", i32 1}
 ;
 ;.
 ; CHECK: attributes #[[ATTR0]] = { noinline optnone "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
@@ -133,7 +133,7 @@ attributes #0 = { optnone noinline "omp_target_thread_limit"="666" "omp_target_n
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel0, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{void ()* @kernel1, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{void ()* @kernel2, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @kernel0, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @kernel1, !"kernel", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
 ;.
diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll
index 8cfc97aff70931..1c304ce743eda1 100644
--- a/llvm/test/Transforms/OpenMP/global_constructor.ll
+++ b/llvm/test/Transforms/OpenMP/global_constructor.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @_ZL6Device = internal global double 0.000000e+00, align 8
 @__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0
 
-define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
+define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
 entry:
-  %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
+  %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -18,31 +18,31 @@ common.ret:
   ret void
 
 user_code.entry:
-  %1 = load double, double* @_ZL6Device, align 8, !tbaa !11
+  %1 = load double, ptr @_ZL6Device, align 8, !tbaa !11
   %2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #0
   %3 = icmp eq i32 %2, 0
   br i1 %3, label %region.guarded, label %region.barrier
 
 region.guarded:
-  store double %1, double* %X, align 8, !tbaa !11
+  store double %1, ptr %X, align 8, !tbaa !11
   br label %region.barrier
 
 region.barrier:
-  tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2)
-  tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2) #0
+  tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
+  tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0
   br label %common.ret
 }
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
 
 define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
 entry:
   %call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
   %call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
   %div = fdiv double %call.i, %call.i2
-  store double %div, double* @_ZL6Device, align 8, !tbaa !11
+  store double %div, ptr @_ZL6Device, align 8, !tbaa !11
   ret void
 }
 
@@ -50,7 +50,7 @@ declare double @__nv_log(double)
 
 declare i32 @__kmpc_get_hardware_thread_id_in_block()
 
-declare void @__kmpc_barrier_simple_spmd(%struct.ident_t*, i32)
+declare void @__kmpc_barrier_simple_spmd(ptr, i32)
 
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
@@ -63,8 +63,8 @@ attributes #1 = { convergent nounwind }
 !0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1}
 !1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2}
 !2 = !{i32 1, !"_ZL6Device", i32 0, i32 0}
-!3 = !{void ()* @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1}
-!4 = !{void (double*)* @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1}
+!3 = !{ptr @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1}
+!4 = !{ptr @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1}
 !5 = !{i32 1, !"wchar_size", i32 4}
 !6 = !{i32 7, !"openmp", i32 50}
 !7 = !{i32 7, !"openmp-device", i32 50}
@@ -76,24 +76,24 @@ attributes #1 = { convergent nounwind }
 !13 = !{!"omnipotent char", !14, i64 0}
 !14 = !{!"Simple C++ TBAA"}
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
-; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]]
+; CHECK-NEXT:    store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT:    tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
+; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
+; CHECK-NEXT:    tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
 ; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 ;
@@ -104,6 +104,6 @@ attributes #1 = { convergent nounwind }
 ; CHECK-NEXT:    [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
 ; CHECK-NEXT:    [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
-; CHECK-NEXT:    store double [[DIV]], double* @_ZL6Device, align 8, !tbaa [[TBAA11]]
+; CHECK-NEXT:    store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
index 854cf85b551b1c..701c09ff05c1e3 100644
--- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
@@ -7,35 +7,33 @@ target triple = "nvptx64"
 ; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
 ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
-@S = external local_unnamed_addr global i8*
+@S = external local_unnamed_addr global ptr
 
 define void @foo() {
 entry:
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true)
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
-  %x_on_stack = bitcast i8* %0 to i32*
-  %1 = bitcast i32* %x_on_stack to i8*
-  call void @share(i8* %1), !dbg !10
-  call void @__kmpc_free_shared(i8* %0)
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false)
+  %c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true)
+  %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
+  call void @share(ptr %0), !dbg !10
+  call void @__kmpc_free_shared(ptr %0)
+  call void @__kmpc_target_deinit(ptr null, i1 false)
   ret void
 }
 
-define internal void @share(i8* %x) {
+define internal void @share(ptr %x) {
 entry:
-  store i8* %x, i8** @S
+  store ptr %x, ptr @S
   ret void
 }
 
-declare i8* @__kmpc_alloc_shared(i64)
+declare ptr @__kmpc_alloc_shared(i64)
 
-declare void @__kmpc_free_shared(i8* nocapture)
+declare void @__kmpc_free_shared(ptr nocapture)
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1);
+declare i32 @__kmpc_target_init(ptr, i1, i1);
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1)
+declare void @__kmpc_target_deinit(ptr, i1)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
@@ -48,7 +46,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1)
 !4 = !{i32 1, !"wchar_size", i32 4}
 !5 = !{i32 7, !"openmp", i32 50}
 !6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{void ()* @foo, !"kernel", i32 1}
+!7 = !{ptr @foo, !"kernel", i32 1}
 !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 !9 = !DISubroutineType(types: !2)
 !10 = !DILocation(line: 5, column: 7, scope: !8)
diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
index aa578a2bd71179..5efce91387e9eb 100644
--- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
@@ -16,13 +16,13 @@ define void @non_kernel() {
 }
 
 ; Needed to trigger the openmp-opt pass
-declare dso_local void @__kmpc_kernel_prepare_parallel(i8*)
+declare dso_local void @__kmpc_kernel_prepare_parallel(ptr)
 
 !llvm.module.flags = !{!4}
 !nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
 
-!0 = !{void ()* @kernel1, !"kernel", i32 1}
-!1 = !{void ()* @non_kernel, !"non_kernel", i32 1}
+!0 = !{ptr @kernel1, !"kernel", i32 1}
+!1 = !{ptr @non_kernel, !"non_kernel", i32 1}
 !2 = !{null, !"align", i32 1}
-!3 = !{void ()* @kernel2, !"kernel", i32 1}
+!3 = !{ptr @kernel2, !"kernel", i32 1}
 !4 = !{i32 7, !"openmp", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
index 4bedf541ca7968..29f969d23e1ff6 100644
--- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -26,74 +26,72 @@
 ; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
 ; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef
 
-; CHECK-DAG:   icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
-; CHECK-DAG:   icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*)
+; CHECK-DAG:   icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__1_wrapper.ID
+; CHECK-DAG:   icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__2_wrapper.ID
 
 
-; CHECK-DAG:   call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** %{{.*}}, i64 0)
-; CHECK-DAG:   call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** %{{.*}}, i64 0)
-; CHECK-DAG:   call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %{{.*}}, i64 0)
+; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr %{{.*}}, i64 0)
+; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr %{{.*}}, i64 0)
+; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %{{.*}}, i64 0)
 
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
-@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata"
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
+@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata"
 
 define weak void @__omp_offloading_10301_87b2c_foo_l7() {
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  store i32 0, ptr %.zero.addr, align 4
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr)
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 %1, ptr %.threadid_temp., align 4
+  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
   ret i32 0
 }
 
 declare void @unknown()
 
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %2, i64 0)
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %captured_vars_addrs1 = alloca [0 x ptr], align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+  %0 = load ptr, ptr %.global_tid..addr, align 8
+  %1 = load i32, ptr %0, align 4
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
   call void @bar()
   call void @unknown()
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %3, i64 0)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs1, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   ret void
 }
 
@@ -102,34 +100,33 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr)
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr)
   ret void
 }
 
-declare void @__kmpc_get_shared_variables(i8***)
+declare void @__kmpc_get_shared_variables(ptr)
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
 
 define hidden void @bar() {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
-define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   ret void
 }
 
@@ -138,25 +135,25 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr)
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr)
   ret void
 }
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_global_thread_num(ptr)
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
-define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   ret void
 }
 
@@ -165,12 +162,12 @@ entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr)
+  %global_args = alloca ptr, align 8
+  store i32 0, ptr %.zero.addr, align 4
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr)
   ret void
 }
 
@@ -179,6 +176,6 @@ entry:
 !llvm.module.flags = !{!2, !3}
 
 !0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0}
-!1 = !{void ()* @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1}
+!1 = !{ptr @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1}
 !2 = !{i32 7, !"openmp", i32 50}
 !3 = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll
index f69f2abe5cc0c9..f76d4876a55dec 100644
--- a/llvm/test/Transforms/OpenMP/icv_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll
@@ -5,10 +5,10 @@ source_filename = "icv_remarks.c"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
 @1 = private unnamed_addr constant [26 x i8] c";icv_remarks.c;foo;18;1;;\00", align 1
 
 ; CHECK-DAG: remark: icv_remarks.c:12:0: OpenMP ICV nthreads Value: IMPLEMENTATION_DEFINED
@@ -17,16 +17,15 @@ target triple = "x86_64-unknown-linux-gnu"
 define dso_local void @foo(i32 %a) local_unnamed_addr #0 !dbg !17 {
 entry:
   %.kmpc_loc.addr = alloca %struct.ident_t, align 8
-  %0 = bitcast %struct.ident_t* %.kmpc_loc.addr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %0, i8* nonnull align 8 dereferenceable(24) bitcast (%struct.ident_t* @0 to i8*), i64 16, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(24) %.kmpc_loc.addr, ptr nonnull align 8 dereferenceable(24) @0, i64 16, i1 false)
   call void @llvm.dbg.value(metadata i32 %a, metadata !19, metadata !DIExpression()), !dbg !21
   tail call void @omp_set_num_threads(i32 %a) #1, !dbg !22
   %call = tail call i32 @omp_get_max_threads() #1, !dbg !23
   call void @llvm.dbg.value(metadata i32 %call, metadata !20, metadata !DIExpression()), !dbg !21
   tail call void @use(i32 %call) #1, !dbg !24
-  %1 = getelementptr inbounds %struct.ident_t, %struct.ident_t* %.kmpc_loc.addr, i64 0, i32 4, !dbg !25
-  store i8* getelementptr inbounds ([26 x i8], [26 x i8]* @1, i64 0, i64 0), i8** %1, align 8, !dbg !25, !tbaa !26
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull %.kmpc_loc.addr, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) #1, !dbg !25
+  %0 = getelementptr inbounds %struct.ident_t, ptr %.kmpc_loc.addr, i64 0, i32 4, !dbg !25
+  store ptr @1, ptr %0, align 8, !dbg !25, !tbaa !26
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull %.kmpc_loc.addr, i32 0, ptr @.omp_outlined.) #1, !dbg !25
   ret void, !dbg !32
 }
 
@@ -39,12 +38,12 @@ declare !dbg !12 dso_local void @use(i32) local_unnamed_addr #2
 ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV nthreads Value: IMPLEMENTATION_DEFINED
 ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV active_levels Value: 0
 ; CHECK-DAG: remark: icv_remarks.c:18:0: OpenMP ICV cancel Value: 0
-define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid.) #3 !dbg !33 {
+define internal void @.omp_outlined.(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid.) #3 !dbg !33 {
 entry:
-  call void @llvm.dbg.value(metadata i32* %.global_tid., metadata !41, metadata !DIExpression()), !dbg !43
-  call void @llvm.dbg.value(metadata i32* %.bound_tid., metadata !42, metadata !DIExpression()), !dbg !43
-  call void @llvm.dbg.value(metadata i32* undef, metadata !44, metadata !DIExpression()) #1, !dbg !50
-  call void @llvm.dbg.value(metadata i32* undef, metadata !47, metadata !DIExpression()) #1, !dbg !50
+  call void @llvm.dbg.value(metadata ptr %.global_tid., metadata !41, metadata !DIExpression()), !dbg !43
+  call void @llvm.dbg.value(metadata ptr %.bound_tid., metadata !42, metadata !DIExpression()), !dbg !43
+  call void @llvm.dbg.value(metadata ptr undef, metadata !44, metadata !DIExpression()) #1, !dbg !50
+  call void @llvm.dbg.value(metadata ptr undef, metadata !47, metadata !DIExpression()) #1, !dbg !50
   tail call void @omp_set_num_threads(i32 10) #1, !dbg !52
   %call.i = tail call i32 @omp_get_max_threads() #1, !dbg !53
   call void @llvm.dbg.value(metadata i32 %call.i, metadata !48, metadata !DIExpression()) #1, !dbg !54
@@ -52,9 +51,9 @@ entry:
   ret void, !dbg !56
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #4
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4
 
-declare !callback !57 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr #1
+declare !callback !57 dso_local void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr #1
 
 declare void @llvm.dbg.value(metadata, metadata, metadata) #5
 
diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll
index c51071742327e0..57c8eaee9cb64e 100644
--- a/llvm/test/Transforms/OpenMP/icv_tracking.ll
+++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
 
 ; doesn't modify any ICVs.
 define i32 @icv_free_use(i32 %0) {
@@ -45,9 +45,9 @@ define i32 @ok_use_assume(i32 %0) {
   ret i32 %2
 }
 
-define void @indirect_call(void ()* %0) {
+define void @indirect_call(ptr %0) {
 ; CHECK-LABEL: define {{[^@]+}}@indirect_call
-; CHECK-SAME: (void ()* [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]]) {
 ; CHECK-NEXT:    call void @omp_set_num_threads(i32 4)
 ; CHECK-NEXT:    tail call void [[TMP0]]()
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @omp_get_max_threads()
@@ -68,7 +68,7 @@ define dso_local i32 @foo(i32 %0, i32 %1) {
 ; CHECK-NEXT:    tail call void @omp_set_num_threads(i32 [[TMP1]])
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP1]])
-; CHECK-NEXT:    tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0:[0-9]+]], i32 0, ptr @.omp_outlined.)
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 0
@@ -82,7 +82,7 @@ define dso_local i32 @foo(i32 %0, i32 %1) {
   %6 = tail call i32 @omp_get_max_threads()
   tail call void @use(i32 %4)
   tail call void @use(i32 %5)
-  tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+  tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.)
   %7 = tail call i32 @omp_get_max_threads()
   tail call void @use(i32 %7)
   ret i32 0
@@ -95,9 +95,9 @@ declare dso_local i32 @omp_get_max_threads()
 declare dso_local void @use(i32)
 declare dso_local void @no_openmp_use(i32) "no_openmp"
 
-define internal void @.omp_outlined.(i32* %0, i32* %1) {
+define internal void @.omp_outlined.(ptr %0, ptr %1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP4]])
@@ -115,7 +115,7 @@ define internal void @.omp_outlined.(i32* %0, i32* %1) {
   ret void
 }
 
-declare !callback !0 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+declare !callback !0 void @__kmpc_fork_call(ptr, i32, ptr, ...)
 
 define dso_local i32 @bar(i32 %0, i32 %1) {
 ; CHECK-LABEL: define {{[^@]+}}@bar
@@ -123,7 +123,7 @@ define dso_local i32 @bar(i32 %0, i32 %1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP0]], i32 [[TMP1]]
 ; CHECK-NEXT:    tail call void @omp_set_num_threads(i32 [[TMP4]])
-; CHECK-NEXT:    tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0]], i32 0, ptr @.omp_outlined..1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP5]])
 ; CHECK-NEXT:    ret i32 0
@@ -133,15 +133,15 @@ define dso_local i32 @bar(i32 %0, i32 %1) {
 ; FIXME: getters can be replaced with %4
   tail call void @omp_set_num_threads(i32 %4)
   %5 = tail call i32 @omp_get_max_threads()
-  tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+  tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..1)
   %6 = tail call i32 @omp_get_max_threads()
   tail call void @use(i32 %6)
   ret i32 0
 }
 
-define internal void @.omp_outlined..1(i32* %0, i32*  %1) {
+define internal void @.omp_outlined..1(ptr %0, ptr  %1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP3]])
 ; CHECK-NEXT:    tail call void @omp_set_num_threads(i32 10)
@@ -166,7 +166,7 @@ define dso_local i32 @bar1(i32 %0, i32 %1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP0]], i32 [[TMP1]]
 ; CHECK-NEXT:    tail call void @omp_set_num_threads(i32 [[TMP4]])
-; CHECK-NEXT:    tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @[[GLOB0]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB0]], i32 0, ptr @.omp_outlined..2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    tail call void @use(i32 [[TMP5]])
 ; CHECK-NEXT:    ret i32 0
@@ -175,15 +175,15 @@ define dso_local i32 @bar1(i32 %0, i32 %1) {
   %4 = select i1 %3, i32 %0, i32 %1
   tail call void @omp_set_num_threads(i32 %4)
   %5 = tail call i32 @omp_get_max_threads()
-  tail call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+  tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2)
   %6 = tail call i32 @omp_get_max_threads()
   tail call void @use(i32 %6)
   ret i32 0
 }
 
-define internal void @.omp_outlined..2(i32* %0, i32*  %1) {
+define internal void @.omp_outlined..2(ptr %0, ptr  %1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @omp_get_max_threads()
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @icv_free_use(i32 [[TMP3]])
 ; CHECK-NEXT:    tail call void @omp_set_num_threads(i32 10)
@@ -500,9 +500,9 @@ define void @test4(i1 %0) {
   ret void
 }
 
-define void @test4_invoke(i1 %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test4_invoke(i1 %0) personality ptr @__gxx_personality_v0 {
 ; CHECK-LABEL: define {{[^@]+}}@test4_invoke
-; CHECK-SAME: (i1 [[TMP0:%.*]]) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-SAME: (i1 [[TMP0:%.*]]) personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    call void @known_unique_icv(i1 [[TMP0]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = invoke i32 @maybe_throw(i1 zeroext [[TMP0]])
 ; CHECK-NEXT:    to label [[CONT:%.*]] unwind label [[EXC:%.*]]
@@ -510,8 +510,8 @@ define void @test4_invoke(i1 %0) personality i8* bitcast (i32 (...)* @__gxx_pers
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i1 [[TMP0]], false
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
 ; CHECK:       exc:
-; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
-; CHECK-NEXT:    filter [0 x i8*] zeroinitializer
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:    filter [0 x ptr] zeroinitializer
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @icv_free_use(i32 10)
@@ -529,8 +529,8 @@ cont:
   br i1 %3, label %5, label %4
 
 exc:
-  %lp = landingpad { i8*, i32 }
-  filter [0 x i8*] zeroinitializer
+  %lp = landingpad { ptr, i32 }
+  filter [0 x ptr] zeroinitializer
   unreachable
 
 4:                                                ; preds = %1
diff --git a/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll b/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll
index b9889f9ba30173..ccdf492c303170 100644
--- a/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll
+++ b/llvm/test/Transforms/OpenMP/icv_tracking_out_of_scope.ll
@@ -8,12 +8,12 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 define void @set() {
 ; CHECK-LABEL: define {{[^@]+}}@set() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @inofth, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @inofth, align 4
 ; CHECK-NEXT:    call void @omp_set_num_threads(i32 noundef [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* @inofth, align 4
+  %0 = load i32, ptr @inofth, align 4
   call void @omp_set_num_threads(i32 noundef %0)
   ret void
 }
diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
index e71d99486751ed..3502ef79eaba27 100644
--- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
+++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
@@ -2,14 +2,14 @@
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @is_spmd_exec_mode = weak constant i8 0
 @will_be_spmd_exec_mode = weak constant i8 1
 @non_spmd_exec_mode = weak constant i8 1
 @will_not_be_spmd_exec_mode = weak constant i8 1
 @G = external global i8
-@llvm.compiler.used = appending global [4 x i8*] [i8* @is_spmd_exec_mode, i8* @will_be_spmd_exec_mode, i8* @non_spmd_exec_mode, i8* @will_not_be_spmd_exec_mode ], section "llvm.metadata"
+@llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata"
 
 ;.
 ; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
@@ -17,45 +17,44 @@ target triple = "nvptx64"
 ; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x i8*] [i8* @is_spmd_exec_mode, i8* @will_be_spmd_exec_mode, i8* @non_spmd_exec_mode, i8* @will_not_be_spmd_exec_mode], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata"
 ;.
 define weak void @is_spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    call void @is_spmd_helper1()
 ; CHECK-NEXT:    call void @is_spmd_helper2()
 ; CHECK-NEXT:    call void @is_mixed_helper()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
   call void @is_spmd_helper1()
   call void @is_spmd_helper2()
   call void @is_mixed_helper()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+  call void @__kmpc_target_deinit(ptr null, i8 2)
   ret void
 }
 
 define weak void @will_be_spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    call void @is_spmd_helper2()
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %i, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -63,55 +62,54 @@ common.ret:
   ret void
 
 user_code.entry:
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
+  %0 = call i32 @__kmpc_global_thread_num(ptr null)
   call void @is_spmd_helper2()
-  call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
 define weak void @non_spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
 ; CHECK-NEXT:    call void @is_generic_helper1()
 ; CHECK-NEXT:    call void @is_generic_helper2()
 ; CHECK-NEXT:    call void @is_mixed_helper()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 1)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
   call void @is_generic_helper1()
   call void @is_generic_helper2()
   call void @is_mixed_helper()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
 define weak void @will_not_be_spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
 ; CHECK-NEXT:    call void @is_generic_helper1()
 ; CHECK-NEXT:    call void @is_generic_helper2()
 ; CHECK-NEXT:    call void @is_mixed_helper()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 1)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
   call void @is_generic_helper1()
   call void @is_generic_helper2()
   call void @is_mixed_helper()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
 define internal void @is_spmd_helper1() {
 ; CHECK-LABEL: define {{[^@]+}}@is_spmd_helper1() {
-; CHECK-NEXT:    store i8 1, i8* @G, align 1
+; CHECK-NEXT:    store i8 1, ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
-  store i8 %isSPMD, i8* @G
+  store i8 %isSPMD, ptr @G
   ret void
 }
 
@@ -135,11 +133,11 @@ f:
 
 define internal void @is_generic_helper1() {
 ; CHECK-LABEL: define {{[^@]+}}@is_generic_helper1() {
-; CHECK-NEXT:    store i8 0, i8* @G, align 1
+; CHECK-NEXT:    store i8 0, ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
-  store i8 %isSPMD, i8* @G
+  store i8 %isSPMD, ptr @G
   ret void
 }
 
@@ -168,17 +166,17 @@ f:
 define internal void @is_mixed_helper() {
 ; CHECK-LABEL: define {{[^@]+}}@is_mixed_helper() {
 ; CHECK-NEXT:    [[ISSPMD:%.*]] = call i8 @__kmpc_is_spmd_exec_mode()
-; CHECK-NEXT:    store i8 [[ISSPMD]], i8* @G, align 1
+; CHECK-NEXT:    store i8 [[ISSPMD]], ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
-  store i8 %isSPMD, i8* @G
+  store i8 %isSPMD, ptr @G
   ret void
 }
 
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) {
+; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
@@ -198,10 +196,10 @@ entry:
 
 declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
 declare i8 @__kmpc_is_spmd_exec_mode()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext)
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8)
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext)
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8)
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
+declare i32 @__kmpc_global_thread_num(ptr)
 declare void @foo()
 declare void @bar()
 
@@ -210,10 +208,10 @@ declare void @bar()
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{void ()* @is_spmd, !"kernel", i32 1}
-!3 = !{void ()* @will_be_spmd, !"kernel", i32 1}
-!4 = !{void ()* @non_spmd, !"kernel", i32 1}
-!5 = !{void ()* @will_not_be_spmd, !"kernel", i32 1}
+!2 = !{ptr @is_spmd, !"kernel", i32 1}
+!3 = !{ptr @will_be_spmd, !"kernel", i32 1}
+!4 = !{ptr @non_spmd, !"kernel", i32 1}
+!5 = !{ptr @will_not_be_spmd, !"kernel", i32 1}
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline }
@@ -221,8 +219,8 @@ declare void @bar()
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @is_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{void ()* @will_be_spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{void ()* @non_spmd, !"kernel", i32 1}
-; CHECK: [[META5:![0-9]+]] = !{void ()* @will_not_be_spmd, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @is_spmd, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @will_be_spmd, !"kernel", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @non_spmd, !"kernel", i32 1}
+; CHECK: [[META5:![0-9]+]] = !{ptr @will_not_be_spmd, !"kernel", i32 1}
 ;.
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 908224082097de..65c4694384384c 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -3,13 +3,13 @@
 ;
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
-@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
+@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, ptr @.str }, align 8
 @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer
-@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, ptr @.str }, align 8
 
 ;    void delete_parallel_0(void) {
 ;    #pragma omp parallel
@@ -26,39 +26,39 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 define void @delete_parallel_0() {
 ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn)
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_0() {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn)
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_0() {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, ptr noundef @.omp_outlined.willreturn)
 ; CHECK2-NEXT:    ret void
 entry:
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.0 to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.1 to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.2 to void (i32*, i32*, ...)*))
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.0)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.1)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.willreturn.2)
   ret void
 }
 
-define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined.willreturn(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @unknown() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @unknown() #[[ATTR0]]
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @unknown() #[[ATTR0]]
 ; CHECK2-NEXT:    ret void
@@ -67,20 +67,20 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn {
+define internal void @.omp_outlined.willreturn.0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) willreturn {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @readonly()
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @readonly() #[[ATTR4:[0-9]+]]
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @readonly() #[[ATTR4:[0-9]+]]
 ; CHECK2-NEXT:    ret void
@@ -89,20 +89,20 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined.willreturn.1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @readnone() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @readnone() #[[ATTR0]]
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @readnone() #[[ATTR0]]
 ; CHECK2-NEXT:    ret void
@@ -111,18 +111,18 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined.willreturn.2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    ret void
 entry:
@@ -144,45 +144,45 @@ entry:
 define void @delete_parallel_1() {
 ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1)
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_1() {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.)
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0)
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1)
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_1() {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined.)
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..0)
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, ptr noundef @.omp_outlined..1)
 ; CHECK2-NEXT:    ret void
 entry:
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..0)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..1)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2)
   ret void
 }
 
-define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @unknown()
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @unknown()
 ; CHECK2-NEXT:    ret void
@@ -191,20 +191,20 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined..0(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @readonly()
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @readonly() #[[ATTR4]]
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..0
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @readonly() #[[ATTR4]]
 ; CHECK2-NEXT:    ret void
@@ -213,20 +213,20 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined..1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @readnone()
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    call void @readnone()
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    call void @readnone()
 ; CHECK2-NEXT:    ret void
@@ -235,18 +235,18 @@ entry:
   ret void
 }
 
-define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined..2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    ret void
 entry:
@@ -282,95 +282,87 @@ define void @delete_parallel_2() {
 ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_2() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
-; CHECK-NEXT:    store i32 0, i32* [[A]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]]
+; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]])
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_2() {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
-; CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]]
+; CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]])
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_2() {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
-; CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR0]]
+; CHECK2-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A]])
 ; CHECK2-NEXT:    ret void
 entry:
   %a = alloca i32, align 4
-  %tmp = bitcast i32* %a to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp)
-  store i32 0, i32* %a, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a)
-  %tmp1 = bitcast i32* %a to i8*
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a)
+  store i32 0, ptr %a, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..3, ptr nonnull %a)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..4, ptr nonnull %a)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..5, ptr nonnull %a)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 1, ptr @.omp_outlined..6, ptr nonnull %a)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a)
   ret void
 }
 
-define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+define internal void @.omp_outlined..3(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9:[0-9]+]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK-NEXT:    store i32 [[INC]], ptr [[A]], align 4
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK1-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]]
 ; CHECK1-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; CHECK1-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK1:       if.then:
-; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
-; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK1-NEXT:    store i32 [[INC]], ptr [[A]], align 4
 ; CHECK1-NEXT:    br label [[IF_END]]
 ; CHECK1:       if.end:
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]]
 ; CHECK2-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; CHECK2-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK2:       if.then:
-; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
-; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK2-NEXT:    store i32 [[INC]], ptr [[A]], align 4
 ; CHECK2-NEXT:    br label [[IF_END]]
 ; CHECK2:       if.end:
 ; CHECK2-NEXT:    ret void
@@ -380,357 +372,333 @@ entry:
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %tmp = load i32, i32* %a, align 4
+  %tmp = load i32, ptr %a, align 4
   %inc = add nsw i32 %tmp, 1
-  store i32 %inc, i32* %a, align 4
+  store i32 %inc, ptr %a, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+define internal void @.omp_outlined..4(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK:       omp_if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    br label [[OMP_IF_END]]
 ; CHECK:       omp_if.end:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK1:       omp_if.then:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK1-NEXT:    br label [[OMP_IF_END]]
 ; CHECK1:       omp_if.end:
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK2:       omp_if.then:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK2-NEXT:    br label [[OMP_IF_END]]
 ; CHECK2:       omp_if.end:
 ; CHECK2-NEXT:    ret void
 entry:
-  %tmp = load i32, i32* %.global_tid., align 4
-  %tmp1 = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 %tmp)
+  %tmp = load i32, ptr %.global_tid., align 4
+  %tmp1 = call i32 @__kmpc_master(ptr nonnull @0, i32 %tmp)
   %tmp2 = icmp eq i32 %tmp1, 0
   br i1 %tmp2, label %omp_if.end, label %omp_if.then
 
 omp_if.then:                                      ; preds = %entry
-  %tmp3 = load i32, i32* %a, align 4
+  %tmp3 = load i32, ptr %a, align 4
   %inc = add nsw i32 %tmp3, 1
-  store i32 %inc, i32* %a, align 4
-  call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 %tmp)
+  store i32 %inc, ptr %a, align 4
+  call void @__kmpc_end_master(ptr nonnull @0, i32 %tmp)
   br label %omp_if.end
 
 omp_if.end:                                       ; preds = %entry, %omp_if.then
   ret void
 }
 
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
 
 declare i32 @omp_get_thread_num() inaccessiblememonly nofree nosync nounwind readonly
 
-declare i32 @__kmpc_master(%struct.ident_t*, i32)
+declare i32 @__kmpc_master(ptr, i32)
 
-declare void @__kmpc_end_master(%struct.ident_t*, i32)
+declare void @__kmpc_end_master(ptr, i32)
 
-define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+define internal void @.omp_outlined..5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR9]]
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR9]]
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK:       omp_if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    br label [[OMP_IF_END]]
 ; CHECK:       omp_if.end:
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]]
-; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR14]]
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK1:       omp_if.then:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK1-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK1-NEXT:    br label [[OMP_IF_END]]
 ; CHECK1:       omp_if.end:
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]]
-; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR14]]
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK2:       omp_if.then:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK2-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    store i32 [[INC]], ptr [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK2-NEXT:    br label [[OMP_IF_END]]
 ; CHECK2:       omp_if.end:
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    ret void
 entry:
-  %omp_global_thread_num = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
-  %tmp = load i32, i32* %.global_tid., align 4
-  %tmp1 = call i32 @__kmpc_single(%struct.ident_t* nonnull @0, i32 %tmp)
+  %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr nonnull @0)
+  %tmp = load i32, ptr %.global_tid., align 4
+  %tmp1 = call i32 @__kmpc_single(ptr nonnull @0, i32 %tmp)
   %tmp2 = icmp eq i32 %tmp1, 0
   br i1 %tmp2, label %omp_if.end, label %omp_if.then
 
 omp_if.then:                                      ; preds = %entry
-  %tmp3 = load i32, i32* %a, align 4
+  %tmp3 = load i32, ptr %a, align 4
   %inc = add nsw i32 %tmp3, 1
-  store i32 %inc, i32* %a, align 4
-  call void @__kmpc_end_single(%struct.ident_t* nonnull @0, i32 %tmp)
+  store i32 %inc, ptr %a, align 4
+  call void @__kmpc_end_single(ptr nonnull @0, i32 %tmp)
   br label %omp_if.end
 
 omp_if.end:                                       ; preds = %entry, %omp_if.then
-  call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %omp_global_thread_num) #6
+  call void @__kmpc_barrier(ptr nonnull @1, i32 %omp_global_thread_num) #6
   ret void
 }
 
-define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+define internal void @.omp_outlined..6(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-; CHECK-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR14:[0-9]+]]
-; CHECK-NEXT:    store i32 1, i32* [[A1]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
-; CHECK-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR14:[0-9]+]]
+; CHECK-NEXT:    store i32 1, ptr [[A1]], align 4
+; CHECK-NEXT:    store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 ; CHECK-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 ; CHECK-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       .omp.reduction.case1:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK:       .omp.reduction.case2:
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A1]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4
 ; CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK:       .omp.reduction.default:
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]])
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A1:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]]
-; CHECK1-NEXT:    store i32 1, i32* [[A1]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
-; CHECK1-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
-; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
-; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR0]]
+; CHECK1-NEXT:    store i32 1, ptr [[A1]], align 4
+; CHECK1-NEXT:    store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK1-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 ; CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 ; CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 ; CHECK1-NEXT:    ]
 ; CHECK1:       .omp.reduction.case1:
-; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
-; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A]], align 4
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
 ; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
-; CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK1-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK1:       .omp.reduction.case2:
-; CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
-; CHECK1-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4
+; CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A1]], align 4
+; CHECK1-NEXT:    [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4
 ; CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK1:       .omp.reduction.default:
-; CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]])
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A1:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]]
-; CHECK2-NEXT:    store i32 1, i32* [[A1]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
-; CHECK2-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
-; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR0]]
+; CHECK2-NEXT:    store i32 1, ptr [[A1]], align 4
+; CHECK2-NEXT:    store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK2-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 ; CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 ; CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 ; CHECK2-NEXT:    ]
 ; CHECK2:       .omp.reduction.case1:
-; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
-; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A]], align 4
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A1]], align 4
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
-; CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(ptr noundef nonnull @[[GLOB2]], i32 [[TMP2]], ptr noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK2:       .omp.reduction.case2:
-; CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
-; CHECK2-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4
+; CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A1]], align 4
+; CHECK2-NEXT:    [[TMP8:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP7]] monotonic, align 4
 ; CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK2:       .omp.reduction.default:
-; CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[A1]])
 ; CHECK2-NEXT:    ret void
 entry:
   %a1 = alloca i32, align 4
-  %.omp.reduction.red_list = alloca [1 x i8*], align 8
-  %tmp = bitcast i32* %a1 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp)
-  store i32 1, i32* %a1, align 4
-  %tmp1 = bitcast [1 x i8*]* %.omp.reduction.red_list to i32**
-  store i32* %a1, i32** %tmp1, align 8
-  %tmp2 = load i32, i32* %.global_tid., align 4
-  %tmp3 = bitcast [1 x i8*]* %.omp.reduction.red_list to i8*
-  %tmp4 = call i32 @__kmpc_reduce_nowait(%struct.ident_t* nonnull @2, i32 %tmp2, i32 1, i64 8, i8* nonnull %tmp3, void (i8*, i8*)* nonnull @.omp.reduction.reduction_func, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var)
+  %.omp.reduction.red_list = alloca [1 x ptr], align 8
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a1)
+  store i32 1, ptr %a1, align 4
+  store ptr %a1, ptr %.omp.reduction.red_list, align 8
+  %tmp2 = load i32, ptr %.global_tid., align 4
+  %tmp4 = call i32 @__kmpc_reduce_nowait(ptr nonnull @2, i32 %tmp2, i32 1, i64 8, ptr nonnull %.omp.reduction.red_list, ptr nonnull @.omp.reduction.reduction_func, ptr nonnull @.gomp_critical_user_.reduction.var)
   switch i32 %tmp4, label %.omp.reduction.default [
   i32 1, label %.omp.reduction.case1
   i32 2, label %.omp.reduction.case2
   ]
 
 .omp.reduction.case1:                             ; preds = %entry
-  %tmp5 = load i32, i32* %a, align 4
-  %tmp6 = load i32, i32* %a1, align 4
+  %tmp5 = load i32, ptr %a, align 4
+  %tmp6 = load i32, ptr %a1, align 4
   %add = add nsw i32 %tmp5, %tmp6
-  store i32 %add, i32* %a, align 4
-  call void @__kmpc_end_reduce_nowait(%struct.ident_t* nonnull @2, i32 %tmp2, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var)
+  store i32 %add, ptr %a, align 4
+  call void @__kmpc_end_reduce_nowait(ptr nonnull @2, i32 %tmp2, ptr nonnull @.gomp_critical_user_.reduction.var)
   br label %.omp.reduction.default
 
 .omp.reduction.case2:                             ; preds = %entry
-  %tmp7 = load i32, i32* %a1, align 4
-  %tmp8 = atomicrmw add i32* %a, i32 %tmp7 monotonic
+  %tmp7 = load i32, ptr %a1, align 4
+  %tmp8 = atomicrmw add ptr %a, i32 %tmp7 monotonic
   br label %.omp.reduction.default
 
 .omp.reduction.default:                           ; preds = %.omp.reduction.case2, %.omp.reduction.case1, %entry
-  %tmp9 = bitcast i32* %a1 to i8*
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp9)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a1)
   ret void
 }
 
-define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
+define internal void @.omp.reduction.reduction_func(ptr %arg, ptr %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
+; CHECK-SAME: (ptr nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK1-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
+; CHECK1-SAME: (ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
 ; CHECK1-NEXT:  entry:
-; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
-; CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
-; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
-; CHECK1-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
-; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
-; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8
+; CHECK1-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK1-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 ; CHECK1-NEXT:    ret void
 ; CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK2-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
+; CHECK2-SAME: (ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], ptr nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
-; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
-; CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
-; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
-; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8
+; CHECK2-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 ; CHECK2-NEXT:    ret void
 entry:
-  %tmp = bitcast i8* %arg1 to i32**
-  %tmp2 = load i32*, i32** %tmp, align 8
-  %tmp3 = bitcast i8* %arg to i32**
-  %tmp4 = load i32*, i32** %tmp3, align 8
-  %tmp5 = load i32, i32* %tmp4, align 4
-  %tmp6 = load i32, i32* %tmp2, align 4
+  %tmp2 = load ptr, ptr %arg1, align 8
+  %tmp4 = load ptr, ptr %arg, align 8
+  %tmp5 = load i32, ptr %tmp4, align 4
+  %tmp6 = load i32, ptr %tmp2, align 4
   %add = add nsw i32 %tmp5, %tmp6
-  store i32 %add, i32* %tmp4, align 4
+  store i32 %add, ptr %tmp4, align 4
   ret void
 }
 
-declare i32 @__kmpc_single(%struct.ident_t*, i32)
+declare i32 @__kmpc_single(ptr, i32)
 
-declare void @__kmpc_end_single(%struct.ident_t*, i32)
+declare void @__kmpc_end_single(ptr, i32)
 
-declare void @__kmpc_barrier(%struct.ident_t*, i32)
+declare void @__kmpc_barrier(ptr, i32)
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) nofree nosync nounwind readonly
+declare i32 @__kmpc_global_thread_num(ptr) nofree nosync nounwind readonly
 
-declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+declare i32 @__kmpc_reduce_nowait(ptr, i32, i32, i64, ptr, ptr, ptr)
 
-declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
+declare void @__kmpc_end_reduce_nowait(ptr, i32, ptr)
 
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 
-declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...)
 
 declare void @unknown()
 
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
index 54f27637d9ecda..bb764c2767f403 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll
@@ -4,10 +4,10 @@ source_filename = "parallel_deletion_remarks.c"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
 
 ; void delete_parallel(void) {
 ; #pragma omp parallel
@@ -26,14 +26,14 @@ target triple = "x86_64-pc-linux-gnu"
 ; CHECK: remark: parallel_deletion_remarks.c:12:1: Removing parallel region with no side-effects.
 ; CHECK: remark: parallel_deletion_remarks.c:14:1: Removing parallel region with no side-effects.
 define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 {
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)), !dbg !20
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)), !dbg !21
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined.), !dbg !18
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..2), !dbg !19
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..4), !dbg !20
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, i32 0, ptr @.omp_outlined..6), !dbg !21
   ret void, !dbg !22
 }
 
-declare !callback !23 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr
+declare !callback !23 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr
 
 ; Function Attrs: willreturn
 declare !dbg !4 void @unknown_willreturn(...) #0
@@ -44,22 +44,22 @@ declare !dbg !7 void @readonly_willreturn(...) #1
 ; Function Attrs: readnone willreturn
 declare !dbg !8 void @readnone_willreturn(...) #2
 
-define internal void @.omp_outlined.(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !25 {
+define internal void @.omp_outlined.(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !25 {
   call void (...) @unknown_willreturn(), !dbg !36
   ret void, !dbg !36
 }
 
-define internal void @.omp_outlined..2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !37 {
+define internal void @.omp_outlined..2(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !37 {
   call void (...) @readonly_willreturn(), !dbg !41
   ret void, !dbg !41
 }
 
-define internal void @.omp_outlined..4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !42 {
+define internal void @.omp_outlined..4(ptr noalias nocapture readnone %0, ptr noalias nocapture readnone %1) !dbg !42 {
   call void (...) @readnone_willreturn(), !dbg !46
   ret void, !dbg !46
 }
 
-define internal void @.omp_outlined..6(i32* noalias nocapture %0, i32* noalias nocapture %1) !dbg !47 {
+define internal void @.omp_outlined..6(ptr noalias nocapture %0, ptr noalias nocapture %1) !dbg !47 {
   ret void, !dbg !51
 }
 
diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
index 898356842b189f..654f7b486283a3 100644
--- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
@@ -2,74 +2,74 @@
 ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @no_spmd_exec_mode = weak constant i8 1
 @spmd_exec_mode = weak constant i8 0
 @parallel_exec_mode = weak constant i8 0
 @G = external global i8
-@llvm.compiler.used = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
 
 ;.
 ; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
 ; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
 ;.
 define weak void @none_spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
 ; CHECK-NEXT:    call void @none_spmd_helper()
 ; CHECK-NEXT:    call void @mixed_helper()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 1)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
   call void @none_spmd_helper()
   call void @mixed_helper()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
 define weak void @spmd() {
 ; CHECK-LABEL: define {{[^@]+}}@spmd() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    call void @spmd_helper()
 ; CHECK-NEXT:    call void @mixed_helper()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
   call void @spmd_helper()
   call void @mixed_helper()
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+  call void @__kmpc_target_deinit(ptr null, i8 2)
   ret void
 }
 
 define weak void @parallel() {
 ; CHECK-LABEL: define {{[^@]+}}@parallel() {
-; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
 ; CHECK-NEXT:    call void @spmd_helper()
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr null, i8 2)
 ; CHECK-NEXT:    ret void
 ;
-  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false)
+  %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
   call void @spmd_helper()
-  call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2)
+  call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
+  call void @__kmpc_target_deinit(ptr null, i8 2)
   ret void
 }
 
 define internal void @mixed_helper() {
 ; CHECK-LABEL: define {{[^@]+}}@mixed_helper() {
 ; CHECK-NEXT:    [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
-; CHECK-NEXT:    store i8 [[LEVEL]], i8* @G, align 1
+; CHECK-NEXT:    store i8 [[LEVEL]], ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %level = call i8 @__kmpc_parallel_level()
-  store i8 %level, i8* @G
+  store i8 %level, ptr @G
   ret void
 }
 
@@ -98,17 +98,17 @@ f:
 
 define internal void @spmd_helper() {
 ; CHECK-LABEL: define {{[^@]+}}@spmd_helper() {
-; CHECK-NEXT:    store i8 1, i8* @G, align 1
+; CHECK-NEXT:    store i8 1, ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %level = call i8 @__kmpc_parallel_level()
-  store i8 %level, i8* @G
+  store i8 %level, ptr @G
   ret void
 }
 
-define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) {
+define internal void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) {
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51
-; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8** [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]], ptr [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    call void @parallel_helper()
 ; CHECK-NEXT:    ret void
 ;
@@ -119,34 +119,34 @@ define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i
 define internal void @parallel_helper() {
 ; CHECK-LABEL: define {{[^@]+}}@parallel_helper() {
 ; CHECK-NEXT:    [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
-; CHECK-NEXT:    store i8 [[LEVEL]], i8* @G, align 1
+; CHECK-NEXT:    store i8 [[LEVEL]], ptr @G, align 1
 ; CHECK-NEXT:    ret void
 ;
   %level = call i8 @__kmpc_parallel_level()
-  store i8 %level, i8* @G
+  store i8 %level, ptr @G
   ret void
 }
 
 declare void @foo()
 declare void @bar()
 declare i8 @__kmpc_parallel_level()
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1
-declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1
+declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1
 
 !llvm.module.flags = !{!0, !1}
 !nvvm.annotations = !{!2, !3, !4}
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{void ()* @none_spmd, !"kernel", i32 1}
-!3 = !{void ()* @spmd, !"kernel", i32 1}
-!4 = !{void ()* @parallel, !"kernel", i32 1}
+!2 = !{ptr @none_spmd, !"kernel", i32 1}
+!3 = !{ptr @spmd, !"kernel", i32 1}
+!4 = !{ptr @parallel, !"kernel", i32 1}
 ;.
 ; CHECK: attributes #[[ATTR0]] = { alwaysinline }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @none_spmd, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{void ()* @spmd, !"kernel", i32 1}
-; CHECK: [[META4:![0-9]+]] = !{void ()* @parallel, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @none_spmd, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @spmd, !"kernel", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @parallel, !"kernel", i32 1}
 ;.
diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index f25003218eb481..1ab939b547bb17 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -57,7 +57,7 @@
 ;     }
 ;     use(a);
 ; }
-; void merge_seq_float(float f, float *p) {
+; void merge_seq_float(float f, ptr p) {
 ; #pragma omp parallel
 ;     {
 ;         use(f);
@@ -228,34 +228,34 @@
 ; }
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 
 define dso_local void @merge(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined., ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..1, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined.(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined.(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
 declare dso_local void @use(i32) local_unnamed_addr
 
-declare !callback !1 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr
+declare !callback !1 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr
 
-define internal void @.omp_outlined..1(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..1(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -263,28 +263,28 @@ entry:
 define dso_local void @unmergable_proc_bind(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
-  store i32 %a, i32* %a.addr, align 4
-  call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+  store i32 %a, ptr %a.addr, align 4
+  call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..2, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..3, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..2(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..2(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr
+declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr
 
-declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) local_unnamed_addr
+declare void @__kmpc_push_proc_bind(ptr, i32, i32) local_unnamed_addr
 
-define internal void @.omp_outlined..3(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..3(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -292,26 +292,26 @@ entry:
 define dso_local void @unmergable_num_threads(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
-  store i32 %a, i32* %a.addr, align 4
-  call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %a)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+  store i32 %a, ptr %a.addr, align 4
+  call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %a)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..4, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..5, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..4(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..4(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) local_unnamed_addr
+declare void @__kmpc_push_num_threads(ptr, i32, i32) local_unnamed_addr
 
-define internal void @.omp_outlined..5(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..5(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -319,25 +319,25 @@ entry:
 define dso_local void @unmergable_seq_call(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..6, ptr nonnull %a.addr)
   call void (...) @foo()
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..7, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..6(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..6(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
 declare dso_local void @foo(...) local_unnamed_addr
 
-define internal void @.omp_outlined..7(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..7(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -345,54 +345,54 @@ entry:
 define dso_local void @merge_seq(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %0 = load i32, i32* %a.addr, align 4
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..8, ptr nonnull %a.addr)
+  %0 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %0, 1
-  store i32 %add, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %1 = load i32, i32* %a.addr, align 4
+  store i32 %add, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..9, ptr nonnull %a.addr)
+  %1 = load i32, ptr %a.addr, align 4
   call void @use(i32 %1)
   ret void
 }
 
-define internal void @.omp_outlined..8(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..8(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..9(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..9(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define dso_local void @merge_seq_float(float %f, float* nocapture %p) local_unnamed_addr  {
+define dso_local void @merge_seq_float(float %f, ptr nocapture %p) local_unnamed_addr  {
 entry:
   %f.addr = alloca float, align 4
-  store float %f, float* %f.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), float* nonnull %f.addr)
-  %0 = load float, float* %f.addr, align 4
+  store float %f, ptr %f.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..10, ptr nonnull %f.addr)
+  %0 = load float, ptr %f.addr, align 4
   %add = fadd float %0, 0x40091EB860000000
-  store float %add, float* %p, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), float* nonnull %f.addr)
+  store float %add, ptr %p, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..11, ptr nonnull %f.addr)
   ret void
 }
 
-define internal void @.omp_outlined..10(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., float* nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
+define internal void @.omp_outlined..10(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
 entry:
-  %0 = load float, float* %f, align 4
+  %0 = load float, ptr %f, align 4
   %conv = fptosi float %0 to i32
   call void @use(i32 %conv)
   ret void
 }
 
-define internal void @.omp_outlined..11(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., float* nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
+define internal void @.omp_outlined..11(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %f)  {
 entry:
-  %0 = load float, float* %f, align 4
+  %0 = load float, ptr %f, align 4
   %conv = fptosi float %0 to i32
   call void @use(i32 %conv)
   ret void
@@ -401,26 +401,26 @@ entry:
 define dso_local void @merge_seq_firstprivate(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %0 = load i32, i32* %a.addr, align 4
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..12, ptr nonnull %a.addr)
+  %0 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %0, 1
-  store i32 %add, i32* %a.addr, align 4
+  store i32 %add, ptr %a.addr, align 4
   %a.casted.sroa.0.0.insert.ext = zext i32 %add to i64
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 %a.casted.sroa.0.0.insert.ext)
-  %1 = load i32, i32* %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..13, i64 %a.casted.sroa.0.0.insert.ext)
+  %1 = load i32, ptr %a.addr, align 4
   call void @use(i32 %1)
   ret void
 }
 
-define internal void @.omp_outlined..12(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..12(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..13(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i64 %a)  {
+define internal void @.omp_outlined..13(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., i64 %a)  {
 entry:
   %a.addr.sroa.0.0.extract.trunc = trunc i64 %a to i32
   call void @use(i32 %a.addr.sroa.0.0.extract.trunc)
@@ -431,32 +431,31 @@ define dso_local void @merge_seq_sink_lt(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
   %b = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %0 = bitcast i32* %b to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
-  %1 = ptrtoint i32* %b to i64
-  %2 = trunc i64 %1 to i32
-  store i32 %2, i32* %b, align 4
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..14, ptr nonnull %a.addr)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b)
+  %0 = ptrtoint ptr %b to i64
+  %1 = trunc i64 %0 to i32
+  store i32 %1, ptr %b, align 4
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..15, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..14(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..14(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
 
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 
-define internal void @.omp_outlined..15(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..15(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -465,30 +464,29 @@ define dso_local void @merge_seq_par_use(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
   %b = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %0 = bitcast i32* %b to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
-  %1 = load i32, i32* %a.addr, align 4
-  %add = add nsw i32 %1, 1
-  store i32 %add, i32* %b, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* nonnull %a.addr, i32* nonnull %b)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..16, ptr nonnull %a.addr)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %b)
+  %0 = load i32, ptr %a.addr, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, ptr %b, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 2, ptr @.omp_outlined..17, ptr nonnull %a.addr, ptr nonnull %b)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %b)
   ret void
 }
 
-define internal void @.omp_outlined..16(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..16(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..17(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a, i32* nocapture nonnull readonly align 4 dereferenceable(4) %b)  {
+define internal void @.omp_outlined..17(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a, ptr nocapture nonnull readonly align 4 dereferenceable(4) %b)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
-  %1 = load i32, i32* %b, align 4
+  %1 = load i32, ptr %b, align 4
   call void @use(i32 %1)
   ret void
 }
@@ -497,39 +495,39 @@ define dso_local void @merge_cancellable_regions(i32 %cancel1, i32 %cancel2) loc
 entry:
   %cancel1.addr = alloca i32, align 4
   %cancel2.addr = alloca i32, align 4
-  store i32 %cancel1, i32* %cancel1.addr, align 4
-  store i32 %cancel2, i32* %cancel2.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr)
+  store i32 %cancel1, ptr %cancel1.addr, align 4
+  store i32 %cancel2, ptr %cancel2.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..18, ptr nonnull %cancel1.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..19, ptr nonnull %cancel2.addr)
   ret void
 }
 
-define internal void @.omp_outlined..18(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
+define internal void @.omp_outlined..18(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
 entry:
-  %0 = load i32, i32* %cancel1, align 4
+  %0 = load i32, ptr %cancel1, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32, i32* %.global_tid., align 4
-  %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1)
+  %1 = load i32, ptr %.global_tid., align 4
+  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
   ret void
 
 if.end:                                           ; preds = %entry
   ret void
 }
 
-declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) local_unnamed_addr
+declare i32 @__kmpc_cancel(ptr, i32, i32) local_unnamed_addr
 
-define internal void @.omp_outlined..19(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
+define internal void @.omp_outlined..19(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
 entry:
-  %0 = load i32, i32* %cancel2, align 4
+  %0 = load i32, ptr %cancel2, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32, i32* %.global_tid., align 4
-  %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1)
+  %1 = load i32, ptr %.global_tid., align 4
+  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
   ret void
 
 if.end:                                           ; preds = %entry
@@ -540,41 +538,41 @@ define dso_local void @merge_cancellable_regions_seq(i32 %cancel1, i32 %cancel2)
 entry:
   %cancel1.addr = alloca i32, align 4
   %cancel2.addr = alloca i32, align 4
-  store i32 %cancel1, i32* %cancel1.addr, align 4
-  store i32 %cancel2, i32* %cancel2.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32* nonnull %cancel1.addr)
-  %0 = load i32, i32* %cancel1.addr, align 4
+  store i32 %cancel1, ptr %cancel1.addr, align 4
+  store i32 %cancel2, ptr %cancel2.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..20, ptr nonnull %cancel1.addr)
+  %0 = load i32, ptr %cancel1.addr, align 4
   %tobool.not = icmp eq i32 %0, 0
   %lnot.ext = zext i1 %tobool.not to i32
-  store i32 %lnot.ext, i32* %cancel2.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), i32* nonnull %cancel2.addr)
+  store i32 %lnot.ext, ptr %cancel2.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..21, ptr nonnull %cancel2.addr)
   ret void
 }
 
-define internal void @.omp_outlined..20(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
+define internal void @.omp_outlined..20(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel1)  {
 entry:
-  %0 = load i32, i32* %cancel1, align 4
+  %0 = load i32, ptr %cancel1, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32, i32* %.global_tid., align 4
-  %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1)
+  %1 = load i32, ptr %.global_tid., align 4
+  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
   ret void
 
 if.end:                                           ; preds = %entry
   ret void
 }
 
-define internal void @.omp_outlined..21(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
+define internal void @.omp_outlined..21(ptr noalias nocapture readonly %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %cancel2)  {
 entry:
-  %0 = load i32, i32* %cancel2, align 4
+  %0 = load i32, ptr %cancel2, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32, i32* %.global_tid., align 4
-  %2 = call i32 @__kmpc_cancel(%struct.ident_t* nonnull @1, i32 %1, i32 1)
+  %1 = load i32, ptr %.global_tid., align 4
+  %2 = call i32 @__kmpc_cancel(ptr nonnull @1, i32 %1, i32 1)
   ret void
 
 if.end:                                           ; preds = %entry
@@ -584,30 +582,30 @@ if.end:                                           ; preds = %entry
 define dso_local void @merge_3(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..22, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..23, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..24, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..22(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..22(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..23(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..23(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..24(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..24(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -615,35 +613,35 @@ entry:
 define dso_local void @merge_3_seq(i32 %a, i32 %b) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %0 = load i32, i32* %a.addr, align 4
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..25, ptr nonnull %a.addr)
+  %0 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %0, 1
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %1 = load i32, i32* %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..26, ptr nonnull %a.addr)
+  %1 = load i32, ptr %a.addr, align 4
   %add1 = add nsw i32 %add, %1
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..27, ptr nonnull %a.addr)
   call void @use(i32 %add1)
   ret void
 }
 
-define internal void @.omp_outlined..25(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..25(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..26(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..26(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..27(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..27(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -651,32 +649,32 @@ entry:
 define dso_local void @unmergable_3_seq_call(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..28, ptr nonnull %a.addr)
   call void (...) @foo()
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..29, ptr nonnull %a.addr)
   call void (...) @foo()
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..30, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..28(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..28(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..29(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..29(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..30(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..30(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -684,32 +682,32 @@ entry:
 define dso_local void @unmergable_3_proc_bind(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void @__kmpc_push_proc_bind(%struct.ident_t* nonnull @1, i32 %0, i32 3)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..31, ptr nonnull %a.addr)
+  call void @__kmpc_push_proc_bind(ptr nonnull @1, i32 %0, i32 3)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..32, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..33, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..31(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..31(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..32(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..32(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..33(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..33(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -717,33 +715,33 @@ entry:
 define dso_local void @unmergable_3_num_threads(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  %1 = load i32, i32* %a.addr, align 4
-  call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @1, i32 %0, i32 %1)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..34, ptr nonnull %a.addr)
+  %1 = load i32, ptr %a.addr, align 4
+  call void @__kmpc_push_num_threads(ptr nonnull @1, i32 %0, i32 %1)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..35, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..36, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..34(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..34(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..35(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..35(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..36(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..36(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -751,31 +749,31 @@ entry:
 define dso_local void @merge_2_unmergable_1(i32 %a) local_unnamed_addr  {
 entry:
   %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  store i32 %a, ptr %a.addr, align 4
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..37, ptr nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..38, ptr nonnull %a.addr)
   call void (...) @foo()
-  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nonnull %a.addr)
+  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr @.omp_outlined..39, ptr nonnull %a.addr)
   ret void
 }
 
-define internal void @.omp_outlined..37(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..37(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..38(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..38(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
 
-define internal void @.omp_outlined..39(i32* noalias nocapture readnone %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
+define internal void @.omp_outlined..39(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture nonnull readonly align 4 dereferenceable(4) %a)  {
 entry:
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
   call void @use(i32 %0)
   ret void
 }
@@ -791,11 +789,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -804,22 +802,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -828,126 +826,126 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -958,36 +956,36 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
-; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -996,31 +994,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1031,26 +1029,26 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
-; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
@@ -1059,47 +1057,47 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1110,25 +1108,25 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
@@ -1137,11 +1135,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1150,32 +1148,32 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1186,28 +1184,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
@@ -1215,48 +1212,46 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1267,28 +1262,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
@@ -1296,12 +1290,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1310,22 +1304,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1334,26 +1328,26 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -1362,12 +1356,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1376,31 +1370,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1411,37 +1405,37 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -1449,11 +1443,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1462,25 +1456,25 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1489,21 +1483,21 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
@@ -1512,58 +1506,58 @@ entry:
 ; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK:       omp_region.end4:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1574,143 +1568,143 @@ entry:
 ; CHECK:       omp_region.body5:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK:       seq.par.merged2:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
-; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK:       omp_region.body5.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1718,25 +1712,25 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1745,32 +1739,32 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1779,22 +1773,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1803,126 +1797,126 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1933,36 +1927,36 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
-; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1971,31 +1965,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2006,26 +2000,26 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
-; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
@@ -2034,47 +2028,47 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2085,25 +2079,25 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
@@ -2112,11 +2106,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2125,32 +2119,32 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2161,28 +2155,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
@@ -2190,48 +2183,46 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2242,28 +2233,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
@@ -2271,12 +2261,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2285,22 +2275,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2309,26 +2299,26 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -2337,12 +2327,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2351,31 +2341,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2386,37 +2376,37 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -2424,11 +2414,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2437,25 +2427,25 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2464,21 +2454,21 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
@@ -2487,58 +2477,58 @@ entry:
 ; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK:       omp_region.end4:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2549,143 +2539,143 @@ entry:
 ; CHECK:       omp_region.body5:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK:       seq.par.merged2:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
-; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK:       omp_region.body5.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2693,25 +2683,25 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2720,32 +2710,32 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2754,22 +2744,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2778,126 +2768,126 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2908,36 +2898,36 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
-; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2946,31 +2936,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -2981,26 +2971,26 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
-; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
@@ -3009,47 +2999,47 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3060,25 +3050,25 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
@@ -3087,11 +3077,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3100,32 +3090,32 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3136,28 +3126,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
@@ -3165,48 +3154,46 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3217,28 +3204,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
@@ -3246,12 +3232,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3260,22 +3246,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3284,26 +3270,26 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -3312,12 +3298,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3326,31 +3312,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3361,37 +3347,37 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -3399,11 +3385,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3412,25 +3398,25 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3439,21 +3425,21 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
@@ -3462,58 +3448,58 @@ entry:
 ; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK:       omp_region.end4:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3524,143 +3510,143 @@ entry:
 ; CHECK:       omp_region.body5:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK:       seq.par.merged2:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
-; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK:       omp_region.body5.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3668,25 +3654,25 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3695,32 +3681,32 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3729,22 +3715,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3753,126 +3739,126 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3883,36 +3869,36 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
-; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_float..omp_par, ptr [[F_ADDR]], ptr [[P]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -3921,31 +3907,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[F_ADDR:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -3956,26 +3942,26 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
-; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    store float [[ADD]], ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
@@ -3984,47 +3970,47 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_firstprivate..omp_par, ptr [[A_ADDR]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4035,25 +4021,25 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
@@ -4062,11 +4048,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4075,32 +4061,32 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4111,28 +4097,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
@@ -4140,48 +4125,46 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_seq_par_use..omp_par, ptr [[A_ADDR]], ptr [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    call void @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4192,28 +4175,27 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
@@ -4221,12 +4203,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4235,22 +4217,22 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4259,26 +4241,26 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -4287,12 +4269,12 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @merge_cancellable_regions_seq..omp_par, ptr [[CANCEL1_ADDR]], ptr [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4301,31 +4283,31 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[CANCEL1_ADDR:%.*]], ptr [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4336,37 +4318,37 @@ entry:
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], ptr [[CANCEL2_ADDR]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -4374,11 +4356,11 @@ entry:
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_3..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4387,25 +4369,25 @@ entry:
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4414,21 +4396,21 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
@@ -4437,58 +4419,58 @@ entry:
 ; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @merge_3_seq..omp_par, ptr [[A_ADDR]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK:       omp.par.exit.split:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
-; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]], ptr [[ADD_SEQ_OUTPUT_ALLOC:%.*]], ptr [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK:       omp_region.end4:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4499,143 +4481,143 @@ entry:
 ; CHECK:       omp_region.body5:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK:       seq.par.merged2:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
-; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD1]], ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK:       omp_region.body5.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK:       seq.par.merged:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4643,25 +4625,25 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK:       omp.par.outlined.exit.exitStub:
 ; CHECK-NEXT:    ret void
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4670,36 +4652,36 @@ entry:
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
 
 ; CHECK2-LABEL: define {{[^@]+}}@merge
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4710,22 +4692,22 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined.(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..1(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4738,17 +4720,17 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -4757,26 +4739,26 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..2, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..3, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -4785,26 +4767,26 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..4, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..5, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -4813,25 +4795,25 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..6, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    call void (...) @foo()
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..7, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -4839,52 +4821,52 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.exit.split:
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK2:       entry.split.split:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..8(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..9(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4895,53 +4877,53 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK2-SAME: (float [[F:%.*]], ptr nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
-; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
+; CHECK2-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    store float [[F]], ptr [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
-; CHECK2-NEXT:    store float* [[P]], float** [[GEP_P]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[F_RELOADED]], ptr [[GEP_F_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[F_ADDR]], ptr [[GEP_F_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store ptr [[P]], ptr [[GEP_P]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_float..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4952,36 +4934,36 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
-; CHECK2-NEXT:    [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
+; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load ptr, ptr [[GEP_F_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load ptr, ptr [[GEP_F_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_P:%.*]] = load ptr, ptr [[GEP_P]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load float, ptr [[LOADGEP_F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, float*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..10(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, float*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..11(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4993,30 +4975,30 @@ entry:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
 ; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
-; CHECK2-NEXT:    store float [[ADD]], float* [[LOADGEP_P]], align 4
+; CHECK2-NEXT:    store float [[ADD]], ptr [[LOADGEP_P]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK2-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load float, ptr [[F]], align 4
 ; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK2-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK2-NEXT:    ret void
@@ -5025,58 +5007,58 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_firstprivate..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.exit.split:
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK2:       entry.split.split:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..12(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..13(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5087,31 +5069,31 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], ptr [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK2-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
@@ -5121,15 +5103,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_sink_lt..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5140,32 +5122,32 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..14(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..15(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5176,34 +5158,33 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
-; CHECK2-NEXT:    [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64
-; CHECK2-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK2-NEXT:    store i32 [[TMP6]], i32* [[B]], align 4
-; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[B]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK2-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK2-NEXT:    store i32 [[TMP5]], ptr [[B]], align 4
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5211,66 +5192,64 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
-; CHECK2-NEXT:    store i32* [[B]], i32** [[GEP_B]], align 8
-; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store ptr [[B]], ptr [[GEP_B]], align 8
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0(i64 -1, ptr [[B]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_seq_par_use..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.exit.split:
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK2:       entry.split.split:
-; CHECK2-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0(i64 noundef 4, ptr noundef nonnull [[B]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
-; CHECK2-NEXT:    [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_B:%.*]] = load ptr, ptr [[GEP_B]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..16(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..17(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5281,33 +5260,32 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_B]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5315,19 +5293,19 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5338,24 +5316,24 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..18 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..19 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..18(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..19(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5368,28 +5346,28 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK2:       if.then:
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK2-NEXT:    ret void
 ; CHECK2:       if.end:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK2:       if.then:
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK2-NEXT:    ret void
 ; CHECK2:       if.end:
 ; CHECK2-NEXT:    ret void
@@ -5398,23 +5376,23 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
+; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[CANCEL2]], ptr [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[CANCEL1]], ptr [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
-; CHECK2-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[CANCEL1_RELOADED]], ptr [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[CANCEL1_ADDR]], ptr [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store ptr [[CANCEL2_ADDR]], ptr [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_cancellable_regions_seq..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5425,36 +5403,36 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
-; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load ptr, ptr [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load ptr, ptr [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..20 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..20(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..21 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..21(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5467,40 +5445,40 @@ entry:
 ; CHECK2:       seq.par.merged:
 ; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[LNOT_EXT]], ptr [[LOADGEP_CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL1]], align 4
 ; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK2:       if.then:
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK2-NEXT:    ret void
 ; CHECK2:       if.end:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CANCEL2]], align 4
 ; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK2:       if.then:
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(ptr noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK2-NEXT:    ret void
 ; CHECK2:       if.end:
 ; CHECK2-NEXT:    ret void
@@ -5509,15 +5487,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5528,25 +5506,25 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..22 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..23 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..24 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..22(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..23(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..24(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5559,25 +5537,25 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5585,81 +5563,81 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr, ptr }, align 8
 ; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
-; CHECK2-NEXT:    store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
-; CHECK2-NEXT:    store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_RELOADED]], ptr [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store ptr [[ADD_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 3
+; CHECK2-NEXT:    store ptr [[ADD1_SEQ_OUTPUT_ALLOC]], ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_3_seq..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.exit.split:
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK2:       entry.split.split:
-; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
-; CHECK2-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
-; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
-; CHECK2-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load ptr, ptr [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { ptr, ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 3
+; CHECK2-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load ptr, ptr [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..25 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..25(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..26 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..26(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
 ; CHECK2-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK2:       omp_region.end4:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split.split:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..27 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..27(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5670,50 +5648,50 @@ entry:
 ; CHECK2:       omp_region.body5:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK2:       seq.par.merged2:
-; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
-; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    store i32 [[ADD1]], ptr [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK2:       omp_region.body5.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
 ; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    store i32 [[ADD]], ptr [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5722,35 +5700,35 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..28, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    call void (...) @foo()
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..29, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    call void (...) @foo()
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..30, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5759,35 +5737,35 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..31, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..32, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..33, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5796,35 +5774,35 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..34, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..35, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..36, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
@@ -5832,15 +5810,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
-; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store ptr [[A_ADDR]], ptr [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @merge_2_unmergable_1..omp_par, ptr [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5848,27 +5826,27 @@ entry:
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK2:       entry.split.split:
 ; CHECK2-NEXT:    call void (...) @foo()
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, ptr noundef @.omp_outlined..39, ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
-; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
-; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { ptr }, ptr [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..37 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void (i32*, i32*, ...) bitcast (void (i32*, i32*, i32*)* @.omp_outlined..38 to void (i32*, i32*, ...)*)(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..37(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void (ptr, ptr, ...) @.omp_outlined..38(ptr [[TID_ADDR]], ptr [[ZERO_ADDR]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5881,25 +5859,25 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-SAME: (ptr noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK2-NEXT:  entry:
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 8d8bb5197d94eb..046f39ebed5040 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -12,57 +12,57 @@ target triple = "nvptx64"
 ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
 ; UTC_ARGS: --enable
 
-@S = external local_unnamed_addr global i8*
+@S = external local_unnamed_addr global ptr
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
 ;.
-; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
+; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
 ;.
-; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
+; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
 ;.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-NEXT:    ret i32 0
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-DISABLED-NEXT:    ret i32 0
 ;
   ret i32 0
 }
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 define void @kernel() {
 ; CHECK-LABEL: define {{[^@]+}}@kernel() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
 ; CHECK-NEXT:    call void @foo() #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    call void @bar() #[[ATTR0]]
 ; CHECK-NEXT:    call void @convert_and_move_alloca() #[[ATTR0]]
 ; CHECK-NEXT:    call void @unknown_no_openmp()
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
 ; CHECK-DISABLED-NEXT:  entry:
-; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false)
+; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
 ; CHECK-DISABLED-NEXT:    call void @foo() #[[ATTR0:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    call void @bar() #[[ATTR0]]
 ; CHECK-DISABLED-NEXT:    call void @convert_and_move_alloca() #[[ATTR0]]
 ; CHECK-DISABLED-NEXT:    call void @unknown_no_openmp()
-; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
+; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true)
   call void @foo()
   call void @bar()
   call void @convert_and_move_alloca()
   call void @unknown_no_openmp()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1)
+  call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
   ret void
 }
 
@@ -80,9 +80,9 @@ define internal void @foo() {
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
-  call void @use(i8* %0)
-  call void @__kmpc_free_shared(i8* %0, i64 4)
+  %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !12
+  call void @use(ptr %0)
+  call void @__kmpc_free_shared(ptr %0, i64 4)
   ret void
 }
 
@@ -90,34 +90,34 @@ define internal void @bar() {
 ; CHECK-LABEL: define {{[^@]+}}@bar
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
-; CHECK-NEXT:    call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
-; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
+; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
 ; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-DISABLED-NEXT:  entry:
-; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
-; CHECK-DISABLED-NEXT:    call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
-; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]]
+; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
+; CHECK-DISABLED-NEXT:    call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
+; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !13
-  call void @share(i8* %0), !dbg !13
-  call void @__kmpc_free_shared(i8* %0, i64 4)
+  %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !13
+  call void @share(ptr %0), !dbg !13
+  call void @__kmpc_free_shared(ptr %0, i64 4)
   ret void
 }
 
-define internal void @use(i8* %x) {
+define internal void @use(ptr %x) {
 ; CHECK-LABEL: define {{[^@]+}}@use
-; CHECK-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@use
-; CHECK-DISABLED-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-DISABLED-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-DISABLED-NEXT:  entry:
 ; CHECK-DISABLED-NEXT:    ret void
 ;
@@ -125,21 +125,21 @@ entry:
   ret void
 }
 
-define internal void @share(i8* %x) {
+define internal void @share(ptr %x) {
 ; CHECK-LABEL: define {{[^@]+}}@share
-; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8* [[X]], i8** @S, align 8
+; CHECK-NEXT:    store ptr [[X]], ptr @S, align 8
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@share
-; CHECK-DISABLED-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-DISABLED-NEXT:  entry:
-; CHECK-DISABLED-NEXT:    store i8* [[X]], i8** @S, align 8
+; CHECK-DISABLED-NEXT:    store ptr [[X]], ptr @S, align 8
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
-  store i8* %x, i8** @S
+  store ptr %x, ptr @S
   ret void
 }
 
@@ -147,20 +147,20 @@ define void @unused() {
 ; CHECK-LABEL: define {{[^@]+}}@unused() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
-; CHECK-NEXT:    call void @use(i8* undef)
+; CHECK-NEXT:    call void @use(ptr undef)
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
 ; CHECK-DISABLED-NEXT:  entry:
-; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]]
-; CHECK-DISABLED-NEXT:    call void @use(i8* [[TMP0]])
-; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]], i64 4)
+; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]]
+; CHECK-DISABLED-NEXT:    call void @use(ptr [[TMP0]])
+; CHECK-DISABLED-NEXT:    call void @__kmpc_free_shared(ptr [[TMP0]], i64 4)
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14
-  call void @use(i8* %0)
-  call void @__kmpc_free_shared(i8* %0, i64 4)
+  %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !14
+  call void @use(ptr %0)
+  call void @__kmpc_free_shared(ptr %0, i64 4)
   ret void
 }
 
@@ -172,15 +172,15 @@ define internal void @convert_and_move_alloca() {
 ; CHECK-NEXT:    [[IV_PTR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    br label [[INITLOOP:%.*]]
 ; CHECK:       initloop:
-; CHECK-NEXT:    store i32 0, i32* [[IV_PTR]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[IV_PTR]], align 4
 ; CHECK-NEXT:    br label [[LOOPBODY:%.*]]
 ; CHECK:       loopbody:
-; CHECK-NEXT:    [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4
+; CHECK-NEXT:    [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
 ; CHECK:       loopinc:
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[IV_PTR]], align 4
+; CHECK-NEXT:    store i32 [[INC]], ptr [[IV_PTR]], align 4
 ; CHECK-NEXT:    br label [[LOOPBODY]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@@ -192,15 +192,15 @@ define internal void @convert_and_move_alloca() {
 ; CHECK-DISABLED-NEXT:    [[IV_PTR:%.*]] = alloca i32, align 4
 ; CHECK-DISABLED-NEXT:    br label [[INITLOOP:%.*]]
 ; CHECK-DISABLED:       initloop:
-; CHECK-DISABLED-NEXT:    store i32 0, i32* [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT:    store i32 0, ptr [[IV_PTR]], align 4
 ; CHECK-DISABLED-NEXT:    br label [[LOOPBODY:%.*]]
 ; CHECK-DISABLED:       loopbody:
-; CHECK-DISABLED-NEXT:    [[IV:%.*]] = load i32, i32* [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT:    [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
 ; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
 ; CHECK-DISABLED-NEXT:    br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
 ; CHECK-DISABLED:       loopinc:
 ; CHECK-DISABLED-NEXT:    [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT:    store i32 [[INC]], i32* [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT:    store i32 [[INC]], ptr [[IV_PTR]], align 4
 ; CHECK-DISABLED-NEXT:    br label [[LOOPBODY]]
 ; CHECK-DISABLED:       exit:
 ; CHECK-DISABLED-NEXT:    ret void
@@ -208,36 +208,36 @@ define internal void @convert_and_move_alloca() {
 entry:
   %iv_ptr = alloca i32, align 4
   %ub_ptr = alloca i32, align 4
-  store i32 10, i32* %ub_ptr
+  store i32 10, ptr %ub_ptr
   br label %initloop
 
 initloop:
-  store i32 0, i32* %iv_ptr
-  %ub = load i32, i32* %ub_ptr
+  store i32 0, ptr %iv_ptr
+  %ub = load i32, ptr %ub_ptr
   br label %loopbody
 
 loopbody:
-  %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !16
-  call void @use(i8* %0)
-  call void @__kmpc_free_shared(i8* %0, i64 4)
-  %iv = load i32, i32* %iv_ptr
+  %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !16
+  call void @use(ptr %0)
+  call void @__kmpc_free_shared(ptr %0, i64 4)
+  %iv = load i32, ptr %iv_ptr
   %1 = icmp eq i32 %iv, %ub
   br i1 %1, label %exit, label %loopinc
 
 loopinc:
   %inc = add i32 %iv, 1
-  store i32 %inc, i32* %iv_ptr
+  store i32 %inc, ptr %iv_ptr
   br label %loopbody
 
 exit:
   ret void
 }
 
-; CHECK: declare i8* @__kmpc_alloc_shared(i64)
-declare i8* @__kmpc_alloc_shared(i64)
+; CHECK: declare ptr @__kmpc_alloc_shared(i64)
+declare ptr @__kmpc_alloc_shared(i64)
 
-; CHECK: declare void @__kmpc_free_shared(i8* allocptr nocapture, i64)
-declare void @__kmpc_free_shared(i8*, i64)
+; CHECK: declare void @__kmpc_free_shared(ptr allocptr nocapture, i64)
+declare void @__kmpc_free_shared(ptr, i64)
 
 declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 
@@ -250,7 +250,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 !2 = !{}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = !{i32 1, !"wchar_size", i32 4}
-!5 = !{void ()* @kernel, !"kernel", i32 1}
+!5 = !{ptr @kernel, !"kernel", i32 1}
 !6 = !{i32 7, !"openmp", i32 50}
 !7 = !{i32 7, !"openmp-device", i32 50}
 !8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
@@ -284,7 +284,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+; CHECK: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
 ; CHECK: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
 ; CHECK: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 ; CHECK: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
@@ -296,7 +296,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK-DISABLED: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK-DISABLED: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK-DISABLED: [[META7:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+; CHECK-DISABLED: [[META7:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
 ; CHECK-DISABLED: [[DBG8]] = !DILocation(line: 4, column: 2, scope: !9)
 ; CHECK-DISABLED: [[META9:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 ; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2)
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 8344f36504772b..102c1161800208 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -13,11 +13,11 @@ target triple = "nvptx64"
 ; CHECK-LIMIT: remark: replace_globalization.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization
 ; UTC_ARGS: --enable
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
-@S = external local_unnamed_addr global i8*
+@S = external local_unnamed_addr global ptr
 @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @foo_exec_mode = weak constant i8 1
 @bar_exec_mode = weak constant i8 1
 @baz_spmd_exec_mode = weak constant i8 2
@@ -25,79 +25,70 @@ target triple = "nvptx64"
 
 define dso_local void @foo() "kernel" {
 entry:
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
-  %x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
+  %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
+  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
   call void @unknown_no_openmp()
-  %x_on_stack = bitcast i8* %x to i32*
-  %0 = bitcast i32* %x_on_stack to i8*
-  call void @use(i8* %0)
-  call void @__kmpc_free_shared(i8* %x, i64 4)
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  call void @use(ptr %x)
+  call void @__kmpc_free_shared(ptr %x, i64 4)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 }
 
 define void @bar() "kernel" {
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   call void @unknown_no_openmp()
   %cmp = icmp eq i32 %c, -1
   br i1 %cmp, label %master1, label %exit
 master1:
-  %x = call align 4 i8* @__kmpc_alloc_shared(i64 16), !dbg !11
-  %x_on_stack = bitcast i8* %x to [4 x i32]*
-  %a0 = bitcast [4 x i32]* %x_on_stack to i8*
-  call void @use(i8* %a0)
-  call void @__kmpc_free_shared(i8* %x, i64 16)
+  %x = call align 4 ptr @__kmpc_alloc_shared(i64 16), !dbg !11
+  call void @use(ptr %x)
+  call void @__kmpc_free_shared(ptr %x, i64 16)
   br label %next
 next:
   call void @unknown_no_openmp()
   %b0 = icmp eq i32 %c, -1
   br i1 %b0, label %master2, label %exit
 master2:
-  %y = call align 4 i8* @__kmpc_alloc_shared(i64 4), !dbg !12
-  %y_on_stack = bitcast i8* %y to [4 x i32]*
-  %b1 = bitcast [4 x i32]* %y_on_stack to i8*
-  call void @use(i8* %b1)
-  call void @__kmpc_free_shared(i8* %y, i64 4)
+  %y = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12
+  call void @use(ptr %y)
+  call void @__kmpc_free_shared(ptr %y, i64 4)
   br label %exit
 exit:
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 }
 
 define void @baz_spmd() "kernel" {
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true)
+  %c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true)
   call void @unknown_no_openmp()
   %c0 = icmp eq i32 %c, -1
   br i1 %c0, label %master3, label %exit
 master3:
-  %z = call align 4 i8* @__kmpc_alloc_shared(i64 24), !dbg !12
-  %z_on_stack = bitcast i8* %z to [6 x i32]*
-  %c1 = bitcast [6 x i32]* %z_on_stack to i8*
-  call void @use(i8* %c1)
-  call void @__kmpc_free_shared(i8* %z, i64 24)
+  %z = call align 4 ptr @__kmpc_alloc_shared(i64 24), !dbg !12
+  call void @use(ptr %z)
+  call void @__kmpc_free_shared(ptr %z, i64 24)
   br label %exit
 exit:
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2)
+  call void @__kmpc_target_deinit(ptr @1, i8 2)
   ret void
 }
 
-define void @use(i8* %x) {
+define void @use(ptr %x) {
 entry:
-  store i8* %x, i8** @S
+  store ptr %x, ptr @S
   ret void
 }
 
 @offset =global i32 undef
 @stack = internal addrspace(3) global [1024 x i8] undef
-define private i8* @__kmpc_alloc_shared(i64) {
-  %bc = bitcast [1024 x i8] addrspace(3) * @stack to i8 addrspace(3) *
-  %ac = addrspacecast i8 addrspace(3) * %bc to i8*
-  %l = load i32, i32* @offset
-  %gep = getelementptr i8, i8* %ac, i32 %l
-  ret i8* %gep
+define private ptr @__kmpc_alloc_shared(i64) {
+  %ac = addrspacecast ptr addrspace(3) @stack to ptr
+  %l = load i32, ptr @offset
+  %gep = getelementptr i8, ptr %ac, i32 %l
+  ret ptr %gep
 }
 
-declare void @__kmpc_free_shared(i8*, i64)
+declare void @__kmpc_free_shared(ptr, i64)
 
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 
@@ -106,11 +97,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
   ret i32 0
 }
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 
@@ -125,17 +116,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 !4 = !{i32 1, !"wchar_size", i32 4}
 !5 = !{i32 7, !"openmp", i32 50}
 !6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{void ()* @foo, !"kernel", i32 1}
-!8 = !{void ()* @bar, !"kernel", i32 1}
-!13 = !{void ()* @baz_spmd, !"kernel", i32 1}
+!7 = !{ptr @foo, !"kernel", i32 1}
+!8 = !{ptr @bar, !"kernel", i32 1}
+!13 = !{ptr @baz_spmd, !"kernel", i32 1}
 !9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 !10 = !DISubroutineType(types: !2)
 !11 = !DILocation(line: 5, column: 7, scope: !9)
 !12 = !DILocation(line: 5, column: 14, scope: !9)
 ;.
-; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
+; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
@@ -147,77 +138,77 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
-; CHECK-NEXT:    [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
+; CHECK-NEXT:    [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @unknown_no_openmp()
-; CHECK-NEXT:    call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]]
-; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; CHECK-NEXT:    call void @use.internalized(ptr nofree [[X]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]]
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bar
 ; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; CHECK-NEXT:    call void @unknown_no_openmp()
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], -1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
 ; CHECK:       master1:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
 ; CHECK-NEXT:    call void @unknown_no_openmp()
 ; CHECK-NEXT:    [[B0:%.*]] = icmp eq i32 [[C]], -1
 ; CHECK-NEXT:    br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]]
 ; CHECK:       master2:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@baz_spmd
 ; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
+; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true)
 ; CHECK-NEXT:    call void @unknown_no_openmp()
 ; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[C]], -1
 ; CHECK-NEXT:    br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
 ; CHECK:       master3:
-; CHECK-NEXT:    [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
-; CHECK-NEXT:    call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]]
-; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]]
+; CHECK-NEXT:    [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT:    call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK: Function Attrs: nofree norecurse nounwind memory(write)
 ; CHECK-LABEL: define {{[^@]+}}@use.internalized
-; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8* [[X]], i8** @S, align 8
+; CHECK-NEXT:    store ptr [[X]], ptr @S, align 8
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@use
-; CHECK-SAME: (i8* [[X:%.*]]) {
+; CHECK-SAME: (ptr [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8* [[X]], i8** @S, align 8
+; CHECK-NEXT:    store ptr [[X]], ptr @S, align 8
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read)
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared
 ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    [[L:%.*]] = load i32, i32* @offset, align 4
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]]
-; CHECK-NEXT:    ret i8* [[GEP]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr @offset, align 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspacecast (ptr addrspace(3) @stack to ptr), i32 [[L]]
+; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-NEXT:    ret i32 0
 ;
 ;.
@@ -236,9 +227,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{void ()* @foo, !"kernel", i32 1}
-; CHECK: [[META8:![0-9]+]] = !{void ()* @bar, !"kernel", i32 1}
-; CHECK: [[META9:![0-9]+]] = !{void ()* @baz_spmd, !"kernel", i32 1}
+; CHECK: [[META7:![0-9]+]] = !{ptr @foo, !"kernel", i32 1}
+; CHECK: [[META8:![0-9]+]] = !{ptr @bar, !"kernel", i32 1}
+; CHECK: [[META9:![0-9]+]] = !{ptr @baz_spmd, !"kernel", i32 1}
 ; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: !11)
 ; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 ; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2)
diff --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
index 94ae5d798be460..b3e760f5c500bb 100644
--- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
+++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll
@@ -4,45 +4,45 @@
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
-@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
+@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 322, i32 0, i32 0, ptr @.str }, align 8
 
 define i32 @main() {
 entry:
 
-  call void (%struct.ident_t*, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+  call void (ptr, ptr, ...) @__kmpc_fork_call(ptr nonnull @0, ptr @.omp_outlined.)
   ret i32 0
 }
 
 ; Only the last runtime call will be matched due that the rest of the "runtime function" calls
 ; have some type mismatch compared to the real runtime function. See the check at bottom.
-define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 entry:
 
-  call void @__kmpc_master(%struct.ident_t* nonnull @0)
-  call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 0, i32 0)
-  call void @__kmpc_barrier(%struct.ident_t* nonnull @1, float 0.0)
+  call void @__kmpc_master(ptr nonnull @0)
+  call void @__kmpc_end_master(ptr nonnull @0, i32 0, i32 0)
+  call void @__kmpc_barrier(ptr nonnull @1, float 0.0)
   call void @omp_get_thread_num()
-  call void @__kmpc_flush(%struct.ident_t* nonnull @0)
+  call void @__kmpc_flush(ptr nonnull @0)
   ret void
 }
 ; Fewer arguments than expected in variadic function.
-declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, void (i32*, i32*, ...)*, ...)
+declare !callback !2 void @__kmpc_fork_call(ptr, ptr, ...)
 
 ; Fewer number of arguments in non variadic function.
-declare void @__kmpc_master(%struct.ident_t*)
+declare void @__kmpc_master(ptr)
 
 ; Bigger number of arguments in non variadic function.
-declare void @__kmpc_end_master(%struct.ident_t*, i32, i32)
+declare void @__kmpc_end_master(ptr, i32, i32)
 
 ; Different argument type than the expected.
-declare void @__kmpc_barrier(%struct.ident_t*, float)
+declare void @__kmpc_barrier(ptr, float)
 
 ; Proper use of runtime function.
-declare void @__kmpc_flush(%struct.ident_t*)
+declare void @__kmpc_flush(ptr)
 
 ; Different return type.
 declare void @omp_get_thread_num()
diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index 4edff601bcc252..16ccd2d726d364 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -3,10 +3,10 @@
 ; REQUIRES: asserts
 ; ModuleID = 'single_threaded_exeuction.c'
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [1 x i8] c"\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @kernel_exec_mode = weak constant i8 1
 
 
@@ -15,7 +15,7 @@
 ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
 ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
 define void @kernel() {
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false)
+  %call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false)
   %cmp = icmp eq i32 %call, -1
   br i1 %cmp, label %if.then, label %if.else
 if.then:
@@ -23,7 +23,7 @@ if.then:
 if.else:
   br label %if.end
 if.end:
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1)
+  call void @__kmpc_target_deinit(ptr null, i8 1)
   ret void
 }
 
@@ -104,11 +104,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 
-declare void @__kmpc_kernel_prepare_parallel(i8*)
+declare void @__kmpc_kernel_prepare_parallel(ptr)
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 attributes #0 = { cold noinline }
 
@@ -123,7 +123,7 @@ attributes #0 = { cold noinline }
 !4 = !{i32 1, !"wchar_size", i32 4}
 !5 = !{i32 7, !"openmp", i32 50}
 !6 = !{i32 7, !"openmp-device", i32 50}
-!7 = !{void ()* @kernel, !"kernel", i32 1}
+!7 = !{ptr @kernel, !"kernel", i32 1}
 !8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 !9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
 !10 = !DISubroutineType(types: !2)
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index a088755d734b69..daa77cc3fdafb9 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -86,61 +86,61 @@
 ;;     }
 ;; }
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 %struct.kmp_task_t_with_privates = type { %struct.kmp_task_t }
-%struct.kmp_task_t = type { i8*, i32 (i32, i8*)*, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
-%union.kmp_cmplrdata_t = type { i32 (i32, i8*)* }
+%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
+%union.kmp_cmplrdata_t = type { ptr }
 %struct.anon = type {}
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1
 @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1
 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1
 @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1
 @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1
 @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1
-@llvm.compiler.used = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 ;.
 ; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -150,14 +150,14 @@
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -197,16 +197,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
@@ -214,25 +214,25 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
 ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -242,19 +242,19 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker_state_machine.is_active.check:
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
@@ -266,7 +266,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -274,19 +274,19 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
 ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -296,18 +296,18 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker_state_machine.is_active.check:
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*)
+; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.execute:
 ; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
@@ -319,7 +319,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -327,16 +327,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -344,20 +344,20 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 0, ptr %.zero.addr, align 4
+  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU:       for.cond:
 ; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -367,16 +367,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX:       for.cond:
 ; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -386,16 +385,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
 ; AMDGPU-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -405,16 +403,15 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
 ; NVPTX-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -424,14 +421,13 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7:[0-9]+]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -444,35 +440,34 @@ for.cond.cleanup:                                 ; preds = %for.cond
   ret void
 
 for.body:                                         ; preds = %for.cond
-  %0 = load i32, i32* %.global_tid., align 4, !tbaa !18
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0)
+  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
+  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
   %inc = add nsw i32 %i.0, 1
   br label %for.cond, !llvm.loop !22
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    call void @unknown() #[[ATTR8:[0-9]+]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -489,9 +484,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
@@ -499,9 +494,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
@@ -509,9 +504,9 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
@@ -519,19 +514,19 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 %1, i32* %.addr1, align 4, !tbaa !18
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #6
+  %global_args = alloca ptr, align 8
+  store i32 %1, ptr %.addr1, align 4, !tbaa !18
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6
   ret void
 }
 
@@ -542,16 +537,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
@@ -559,25 +554,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -587,19 +582,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker_state_machine.is_active.check:
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
@@ -611,7 +606,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -619,19 +614,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -641,18 +636,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker_state_machine.is_active.check:
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*)
+; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.execute:
 ; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
@@ -664,7 +659,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -672,16 +667,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -689,24 +684,23 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
-  call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 0, ptr %.zero.addr, align 4
+  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+  call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8*
-; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
-; AMDGPU-NEXT:    call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; AMDGPU-NEXT:    call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
 ; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU:       for.cond:
 ; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -716,19 +710,17 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32*
-; NVPTX-NEXT:    call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
 ; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX:       for.cond:
 ; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -738,20 +730,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0)
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8*
-; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32*
-; AMDGPU-DISABLED-NEXT:    call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; AMDGPU-DISABLED-NEXT:    call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
 ; AMDGPU-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -761,19 +751,17 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; AMDGPU-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32*
-; NVPTX-DISABLED-NEXT:    call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]]
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
 ; NVPTX-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -783,17 +771,15 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; NVPTX-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
-  %x_on_stack = bitcast i8* %x to i32*
-  call void @use(i32* nocapture %x_on_stack) #10
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
+  call void @use(ptr nocapture %x) #10
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -803,38 +789,37 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.cond.cleanup:                                 ; preds = %for.cond
   call void @spmd_amenable() #10
-  call void @__kmpc_free_shared(i8* %x, i64 4)
+  call void @__kmpc_free_shared(ptr %x, i64 4)
   ret void
 
 for.body:                                         ; preds = %for.cond
-  %0 = load i32, i32* %.global_tid., align 4, !tbaa !18
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0)
+  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
+  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
   %inc = add nsw i32 %i.0, 1
   br label %for.cond, !llvm.loop !25
 }
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -851,9 +836,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
@@ -861,9 +846,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
@@ -871,9 +856,9 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
@@ -881,19 +866,19 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 %1, i32* %.addr1, align 4, !tbaa !18
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #6
+  %global_args = alloca ptr, align 8
+  store i32 %1, ptr %.addr1, align 4, !tbaa !18
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6
   ret void
 }
 
@@ -905,16 +890,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
@@ -922,25 +907,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -950,19 +935,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker_state_machine.is_active.check:
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
@@ -974,7 +959,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -982,19 +967,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -1004,18 +989,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker_state_machine.is_active.check:
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*)
+; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.execute:
 ; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
@@ -1027,7 +1012,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1035,16 +1020,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -1052,20 +1037,20 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
-  call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 0, ptr %.zero.addr, align 4
+  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+  call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU:       for.cond:
 ; AMDGPU-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1075,18 +1060,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1)
+; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX:       for.cond:
 ; NVPTX-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1096,18 +1079,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1)
+; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
 ; AMDGPU-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1117,18 +1098,16 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1)
+; AMDGPU-DISABLED-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; AMDGPU-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
 ; NVPTX-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1138,18 +1117,15 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1)
+; NVPTX-DISABLED-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; NVPTX-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
 ;
 entry:
-  %captured_vars_addrs = alloca [1 x i8*], align 8
-  %x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
-  %x_on_stack = bitcast i8* %x to i32*
+  %captured_vars_addrs = alloca [1 x ptr], align 8
+  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -1159,61 +1135,59 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.cond.cleanup:                                 ; preds = %for.cond
   call void @spmd_amenable() #10
-  call void @__kmpc_free_shared(i8* %x, i64 4)
+  call void @__kmpc_free_shared(ptr %x, i64 4)
   ret void
 
 for.body:                                         ; preds = %for.cond
-  %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0
-  store i8* %x, i8** %0, align 8, !tbaa !26
-  %1 = load i32, i32* %.global_tid., align 4, !tbaa !18
-  %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 1)
+  store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
+  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
+  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
   %inc = add nsw i32 %i.0, 1
   br label %for.cond, !llvm.loop !28
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 {
+define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %x, align 4, !tbaa !18
+  %0 = load i32, ptr %x, align 4, !tbaa !18
   %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %x, align 4, !tbaa !18
+  store i32 %inc, ptr %x, align 4, !tbaa !18
   call void @unknown() #11
   ret void
 }
@@ -1225,12 +1199,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; AMDGPU-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1238,12 +1211,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; NVPTX-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; NVPTX-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1251,12 +1223,11 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; AMDGPU-DISABLED-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
@@ -1264,25 +1235,23 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; NVPTX-DISABLED-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 %1, i32* %.addr1, align 4, !tbaa !18
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  %2 = load i8**, i8*** %global_args, align 8
-  %3 = bitcast i8** %2 to i32**
-  %4 = load i32*, i32** %3, align 8, !tbaa !26
-  call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %4) #6
+  %global_args = alloca ptr, align 8
+  store i32 %1, ptr %.addr1, align 4, !tbaa !18
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  %2 = load ptr, ptr %global_args, align 8
+  %3 = load ptr, ptr %2, align 8, !tbaa !26
+  call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
   ret void
 }
 
@@ -1293,16 +1262,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
@@ -1310,25 +1279,25 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -1338,19 +1307,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker_state_machine.is_active.check:
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
@@ -1362,7 +1331,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1370,19 +1339,19 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -1392,18 +1361,18 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker_state_machine.is_active.check:
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*)
+; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.execute:
 ; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
@@ -1415,7 +1384,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1423,16 +1392,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -1440,33 +1409,32 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
-  call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 0, ptr %.zero.addr, align 4
+  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+  call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; AMDGPU:       region.check.tid:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; AMDGPU:       region.guarded:
-; AMDGPU-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; AMDGPU:       region.guarded.end:
 ; AMDGPU-NEXT:    br label [[REGION_BARRIER]]
 ; AMDGPU:       region.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[REGION_EXIT:%.*]]
 ; AMDGPU:       region.exit:
 ; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
@@ -1478,31 +1446,28 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
-; AMDGPU-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1)
+; AMDGPU-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; AMDGPU-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
 ; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; NVPTX:       region.check.tid:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; NVPTX-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; NVPTX:       region.guarded:
-; NVPTX-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; NVPTX:       region.guarded.end:
 ; NVPTX-NEXT:    br label [[REGION_BARRIER]]
 ; NVPTX:       region.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[REGION_EXIT:%.*]]
 ; NVPTX:       region.exit:
 ; NVPTX-NEXT:    br label [[FOR_COND:%.*]]
@@ -1514,20 +1479,17 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
-; NVPTX-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1)
+; NVPTX-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; NVPTX-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
-; AMDGPU-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
 ; AMDGPU-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1537,20 +1499,17 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1)
+; AMDGPU-DISABLED-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; AMDGPU-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
-; NVPTX-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
 ; NVPTX-DISABLED-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1560,19 +1519,16 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1)
+; NVPTX-DISABLED-NEXT:    store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
 ; NVPTX-DISABLED-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
 ;
 entry:
-  %captured_vars_addrs = alloca [1 x i8*], align 8
-  %x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
-  %x_on_stack = bitcast i8* %x to i32*
-  store i32 42, i32* %x_on_stack, align 4, !tbaa !18
+  %captured_vars_addrs = alloca [1 x ptr], align 8
+  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
+  store i32 42, ptr %x, align 4, !tbaa !18
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -1582,62 +1538,60 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.cond.cleanup:                                 ; preds = %for.cond
   call void @spmd_amenable() #10
-  call void @__kmpc_free_shared(i8* %x, i64 4)
+  call void @__kmpc_free_shared(ptr %x, i64 4)
   ret void
 
 for.body:                                         ; preds = %for.cond
-  %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0
-  store i8* %x, i8** %0, align 8, !tbaa !26
-  %1 = load i32, i32* %.global_tid., align 4, !tbaa !18
-  %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 1)
+  store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
+  %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
+  call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1)
   %inc = add nsw i32 %i.0, 1
   br label %for.cond, !llvm.loop !29
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 {
+define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-NEXT:    call void @unknowni32p(i32* [[X]]) #[[ATTR8]]
+; AMDGPU-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-NEXT:    call void @unknowni32p(i32* [[X]]) #[[ATTR8]]
+; NVPTX-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT:    call void @unknowni32p(i32* [[X]]) #[[ATTR8]]
+; AMDGPU-DISABLED-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; AMDGPU-DISABLED-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED-NEXT:    store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT:    call void @unknowni32p(i32* [[X]]) #[[ATTR8]]
+; NVPTX-DISABLED-NEXT:    store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
+; NVPTX-DISABLED-NEXT:    call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %x, align 4, !tbaa !18
+  %0 = load i32, ptr %x, align 4, !tbaa !18
   %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %x, align 4, !tbaa !18
-  call void @unknowni32p(i32* %x) #11
+  store i32 %inc, ptr %x, align 4, !tbaa !18
+  call void @unknowni32p(ptr %x) #11
   ret void
 }
 
@@ -1648,12 +1602,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; AMDGPU-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -1661,12 +1614,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; NVPTX-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; NVPTX-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -1674,12 +1626,11 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; AMDGPU-DISABLED-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
@@ -1687,25 +1638,23 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
-; NVPTX-DISABLED-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 %1, i32* %.addr1, align 4, !tbaa !18
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  %2 = load i8**, i8*** %global_args, align 8
-  %3 = bitcast i8** %2 to i32**
-  %4 = load i32*, i32** %3, align 8, !tbaa !26
-  call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %4) #6
+  %global_args = alloca ptr, align 8
+  store i32 %1, ptr %.addr1, align 4, !tbaa !18
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  %2 = load ptr, ptr %global_args, align 8
+  %3 = load ptr, ptr %2, align 8, !tbaa !26
+  call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
   ret void
 }
 
@@ -1714,10 +1663,10 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1727,12 +1676,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
@@ -1745,7 +1694,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1753,18 +1702,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -1774,11 +1723,11 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
@@ -1791,7 +1740,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1799,18 +1748,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -1820,12 +1769,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
@@ -1838,7 +1787,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1846,18 +1795,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -1867,11 +1816,11 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -1884,7 +1833,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1892,15 +1841,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %.zero.addr = alloca i32, align 4
   %.threadid_temp. = alloca i32, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -1908,36 +1857,36 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18
-  call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  store i32 0, ptr %.zero.addr, align 4
+  store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+  call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -1952,9 +1901,9 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
 ; AMDGPU-SAME: () #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
-; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU:       is_worker_check:
@@ -1964,19 +1913,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU:       worker_state_machine.begin:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU:       worker_state_machine.finished:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       worker_state_machine.is_active.check:
 ; AMDGPU-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.check:
-; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*)
+; AMDGPU-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
 ; AMDGPU-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
@@ -1988,7 +1937,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU:       worker_state_machine.done.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU:       thread.user_code.check:
 ; AMDGPU-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -1996,21 +1945,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU:       common.ret:
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       user_code.entry:
-; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT:    [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]]
-; AMDGPU-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates*
-; AMDGPU-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
-; AMDGPU-NEXT:    [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; AMDGPU-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
+; AMDGPU-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
+; AMDGPU-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
 ; NVPTX-SAME: () #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX:       is_worker_check:
@@ -2020,18 +1967,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX:       worker_state_machine.begin:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX:       worker_state_machine.finished:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       worker_state_machine.is_active.check:
 ; NVPTX-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.check:
-; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*)
+; NVPTX-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
 ; NVPTX-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX:       worker_state_machine.parallel_region.execute:
 ; NVPTX-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
@@ -2043,7 +1990,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX:       worker_state_machine.done.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX:       thread.user_code.check:
 ; NVPTX-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -2051,21 +1998,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX:       common.ret:
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       user_code.entry:
-; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT:    [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]]
-; NVPTX-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates*
-; NVPTX-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
-; NVPTX-NEXT:    [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; NVPTX-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
+; NVPTX-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
+; NVPTX-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-NEXT:    br label [[COMMON_RET]]
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
 ; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5)
-; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       is_worker_check:
@@ -2075,19 +2020,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; AMDGPU-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.begin:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
+; AMDGPU-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; AMDGPU-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.finished:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       worker_state_machine.is_active.check:
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*)
+; AMDGPU-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
 ; AMDGPU-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; AMDGPU-DISABLED:       worker_state_machine.parallel_region.execute:
 ; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
@@ -2099,7 +2044,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; AMDGPU-DISABLED:       worker_state_machine.done.barrier:
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; AMDGPU-DISABLED:       thread.user_code.check:
 ; AMDGPU-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -2107,21 +2052,19 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; AMDGPU-DISABLED:       common.ret:
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       user_code.entry:
-; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates*
-; AMDGPU-DISABLED-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED-NEXT:    [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; AMDGPU-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
 ; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; NVPTX-DISABLED:       is_worker_check:
@@ -2131,18 +2074,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; NVPTX-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.begin:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; NVPTX-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.finished:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       worker_state_machine.is_active.check:
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*)
+; NVPTX-DISABLED-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
 ; NVPTX-DISABLED-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
 ; NVPTX-DISABLED:       worker_state_machine.parallel_region.execute:
 ; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
@@ -2154,7 +2097,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; NVPTX-DISABLED:       worker_state_machine.done.barrier:
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; NVPTX-DISABLED:       thread.user_code.check:
 ; NVPTX-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
@@ -2162,18 +2105,16 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
 ; NVPTX-DISABLED:       common.ret:
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       user_code.entry:
-; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates*
-; NVPTX-DISABLED-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED-NEXT:    [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0)
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; NVPTX-DISABLED-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -2181,38 +2122,36 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 %1, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*))
-  %3 = bitcast i8* %2 to %struct.kmp_task_t_with_privates*
-  %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2)
-  %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0)
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
+  %2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
+  %3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
+  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   br label %common.ret
 }
 
 ; Function Attrs: alwaysinline convergent nounwind
-define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 {
+define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 {
 ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
+; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    call void @spmd_amenable() #[[ATTR7]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -2223,100 +2162,96 @@ entry:
 }
 
 ; Function Attrs: convergent norecurse nounwind
-define internal i32 @"_omp_task_entry$"(i32 %0, %struct.kmp_task_t_with_privates* noalias %1) #3 {
+define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 {
 entry:
-  %2 = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* %1, i32 0, i32 0
-  %3 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 2
-  %4 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 0
-  %5 = load i8*, i8** %4, align 8, !tbaa !30
-  %6 = bitcast i8* %5 to %struct.anon*
-  %7 = bitcast %struct.kmp_task_t_with_privates* %1 to i8*
-  call void @.omp_outlined.(i32 %0, i32* %3, i8* null, void (i8*, ...)* null, i8* %7, %struct.anon* %6) #6
+  %2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2
+  %3 = load ptr, ptr %1, align 8, !tbaa !30
+  call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6
   ret i32 0
 }
 
 ; Function Attrs: nounwind
-declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #6
+declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #6
+declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6
 
 ; Function Attrs: nosync nounwind
-declare void @__kmpc_free_shared(i8* nocapture, i64) #8
+declare void @__kmpc_free_shared(ptr nocapture, i64) #8
 
 ; Function Attrs: nofree nosync nounwind
-declare i8* @__kmpc_alloc_shared(i64) #7
+declare ptr @__kmpc_alloc_shared(i64) #7
 
 ; Function Attrs: convergent
-declare void @use(i32* nocapture) #5
+declare void @use(ptr nocapture) #5
 
 ; Function Attrs: convergent
 declare void @unknown() #2
-declare void @unknowni32p(i32*) #2
+declare void @unknowni32p(ptr) #2
 
 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
 ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; AMDGPU-NEXT:    ret i32 0
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; NVPTX-NEXT:    ret i32 0
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; AMDGPU-DISABLED-NEXT:    ret i32 0
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; NVPTX-DISABLED-NEXT:    ret i32 0
 ;
   ret i32 0
 }
 
-declare void @__kmpc_get_shared_variables(i8***)
+declare void @__kmpc_get_shared_variables(ptr)
 
 ; Function Attrs: alwaysinline
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
 
 ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
 
 ; Function Attrs: convergent
 declare void @spmd_amenable() #5
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6
+declare i32 @__kmpc_global_thread_num(ptr) #6
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
-define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    call void @unknown() #[[ATTR8]]
 ; NVPTX-DISABLED-NEXT:    ret void
@@ -2333,9 +2268,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-NEXT:    call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-NEXT:    ret void
 ;
 ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
@@ -2343,9 +2278,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-NEXT:    call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-NEXT:    ret void
 ;
 ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
@@ -2353,9 +2288,9 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; AMDGPU-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; AMDGPU-DISABLED-NEXT:    ret void
 ;
 ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
@@ -2363,19 +2298,19 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; NVPTX-DISABLED-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-DISABLED-NEXT:    call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
 ; NVPTX-DISABLED-NEXT:    ret void
 ;
 entry:
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 %1, i32* %.addr1, align 4, !tbaa !18
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__9(i32* %.addr1, i32* %.zero.addr) #6
+  %global_args = alloca ptr, align 8
+  store i32 %1, ptr %.addr1, align 4, !tbaa !18
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6
   ret void
 }
 
@@ -2405,12 +2340,12 @@ attributes #11 = { convergent }
 !3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
 !4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
 !5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-!6 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-!7 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-!8 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-!9 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-!10 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-!11 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+!6 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+!7 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+!8 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+!9 = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+!10 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+!11 = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
 !12 = !{i32 1, !"wchar_size", i32 4}
 !13 = !{i32 7, !"openmp", i32 50}
 !14 = !{i32 7, !"openmp-device", i32 50}
@@ -2491,12 +2426,12 @@ attributes #11 = { convergent }
 ; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
 ; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
 ; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; AMDGPU: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; AMDGPU: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
 ; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
@@ -2525,12 +2460,12 @@ attributes #11 = { convergent }
 ; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
 ; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
 ; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; NVPTX: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; NVPTX: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
 ; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
@@ -2559,12 +2494,12 @@ attributes #11 = { convergent }
 ; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
 ; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
 ; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
 ; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
@@ -2593,12 +2528,12 @@ attributes #11 = { convergent }
 ; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
 ; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
 ; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
 ; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index 92cb0a861132fd..d550ce201293de 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -12,33 +12,33 @@
 
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1
-@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
 
 ; Function Attrs: alwaysinline convergent norecurse nounwind
 ;.
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
-; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
+; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
-define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
+define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
-; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR3]]
 ; CHECK-NEXT:    [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; CHECK:       region.check.tid:
@@ -46,22 +46,21 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store double [[CALL_I]], double* [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    store double [[CALL_I]], ptr [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]]
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
 ; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %common.ret
 
@@ -69,21 +68,20 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
   %call.i = call double @__nv_sin(double 0x400921FB54442D18) #6
-  store double %call.i, double* %x, align 8, !tbaa !8
-  %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0
-  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3
+  store double %call.i, ptr %x, align 8, !tbaa !8
+  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3
+  call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
   br label %common.ret
 }
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr
+declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
 
 ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
-define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 {
+define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias nocapture [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
@@ -96,25 +94,25 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
 ; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
+; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %global_args = alloca i8**, align 8
-  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3
+  %global_args = alloca ptr, align 8
+  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3
   ret void
 }
 
-declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
+declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
+declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
 
 ; Function Attrs: alwaysinline
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
 
 ; Function Attrs: convergent
 declare double @__nv_sin(double) local_unnamed_addr #5
@@ -133,7 +131,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
 !llvm.ident = !{!7}
 
 !0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
-!1 = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
+!1 = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
 !2 = !{i32 1, !"wchar_size", i32 4}
 !3 = !{i32 7, !"openmp", i32 50}
 !4 = !{i32 7, !"openmp-device", i32 50}
@@ -155,7 +153,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
 ; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
+; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
 ; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 68805e39190d0b..8d8b333a49a541 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -34,59 +34,59 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1
-@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
 
 ; Function Attrs: convergent norecurse nounwind
 ;.
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
-; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
 ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
 ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
-define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) #0 {
+define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6
-; CHECK-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
 ; CHECK-NEXT:    [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR6]]
-; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[N]], 32
 ; CHECK-NEXT:    [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
-; CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM_I]]
+; CHECK-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; CHECK:       region.check.tid:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store i32 0, i32* [[X]], align 4, !noalias !8
-; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1_I]], align 4, !noalias !8
-; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], i32* [[ARRAYIDX2_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 0, ptr [[X]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias !8
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
-; CHECK-NEXT:    call void @usei8ptr(i8* nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
+; CHECK-NEXT:    call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
 ; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
 ; CHECK:       for.cond.i:
 ; CHECK-NEXT:    [[I_0_I:%.*]] = phi i32 [ 2, [[REGION_EXIT]] ], [ [[INC_I:%.*]], [[REGION_EXIT3:%.*]] ]
@@ -96,91 +96,91 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       for.body.i:
 ; CHECK-NEXT:    [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
 ; CHECK-NEXT:    [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM4_I]]
+; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID5:%.*]]
 ; CHECK:       region.check.tid5:
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
 ; CHECK:       region.guarded4:
-; CHECK-NEXT:    store i32 [[SUB3_I]], i32* [[ARRAYIDX5_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias !8
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END1:%.*]]
 ; CHECK:       region.guarded.end1:
 ; CHECK-NEXT:    br label [[REGION_BARRIER2]]
 ; CHECK:       region.barrier2:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4]])
 ; CHECK-NEXT:    br label [[REGION_EXIT3]]
 ; CHECK:       region.exit3:
 ; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
 ; CHECK-NEXT:    br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       __omp_outlined__.exit:
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** null, i64 0)
-; CHECK-NEXT:    [[CALL_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0)
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8
 ; CHECK-NEXT:    [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
-; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM6_I]]
+; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID10:%.*]]
 ; CHECK:       region.check.tid10:
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
 ; CHECK:       region.guarded9:
-; CHECK-NEXT:    store i32 [[CALL_I]], i32* [[ARRAYIDX7_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias !8
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END6:%.*]]
 ; CHECK:       region.guarded.end6:
 ; CHECK-NEXT:    br label [[REGION_BARRIER7]]
 ; CHECK:       region.barrier7:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6]])
 ; CHECK-NEXT:    br label [[REGION_EXIT8:%.*]]
 ; CHECK:       region.exit8:
-; CHECK-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
 ; CHECK-NEXT:    [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM9_I]]
+; CHECK-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID15:%.*]]
 ; CHECK:       region.check.tid15:
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
 ; CHECK:       region.guarded14:
-; CHECK-NEXT:    store i32 [[CALL8_I]], i32* [[ARRAYIDX10_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias !8
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END11:%.*]]
 ; CHECK:       region.guarded.end11:
 ; CHECK-NEXT:    br label [[REGION_BARRIER12]]
 ; CHECK:       region.barrier12:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8]])
 ; CHECK-NEXT:    br label [[REGION_EXIT13:%.*]]
 ; CHECK:       region.exit13:
-; CHECK-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
 ; CHECK-NEXT:    [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
-; CHECK-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM12_I]]
+; CHECK-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
 ; CHECK-NEXT:    br label [[REGION_CHECK_TID20:%.*]]
 ; CHECK:       region.check.tid20:
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
 ; CHECK:       region.guarded19:
-; CHECK-NEXT:    store i32 [[CALL11_I]], i32* [[ARRAYIDX13_I]], align 4, !noalias !8
+; CHECK-NEXT:    store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias !8
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END16:%.*]]
 ; CHECK:       region.guarded.end16:
 ; CHECK-NEXT:    br label [[REGION_BARRIER17]]
 ; CHECK:       region.barrier17:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10]])
 ; CHECK-NEXT:    br label [[REGION_EXIT18:%.*]]
 ; CHECK:       region.exit18:
-; CHECK-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-NEXT:    [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-NEXT:    [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR6]]
+; CHECK-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-NEXT:    [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-NEXT:    [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR6]]
 ; CHECK-NEXT:    ret void
 ; CHECK:       worker.exit:
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6
-; CHECK-DISABLED-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-DISABLED-SAME: (ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-DISABLED-NEXT:  entry:
 ; CHECK-DISABLED-NEXT:    [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
-; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
+; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
 ; CHECK-DISABLED-NEXT:    [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
+; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; CHECK-DISABLED-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; CHECK-DISABLED:       is_worker_check:
@@ -190,11 +190,11 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK-DISABLED-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; CHECK-DISABLED-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; CHECK-DISABLED:       worker_state_machine.begin:
-; CHECK-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; CHECK-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLED-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; CHECK-DISABLED-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; CHECK-DISABLED-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; CHECK-DISABLED:       worker_state_machine.finished:
 ; CHECK-DISABLED-NEXT:    ret void
@@ -211,21 +211,21 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK-DISABLED-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; CHECK-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; CHECK-DISABLED:       worker_state_machine.done.barrier:
-; CHECK-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLED-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; CHECK-DISABLED-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; CHECK-DISABLED:       thread.user_code.check:
 ; CHECK-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK-DISABLED:       user_code.entry:
-; CHECK-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR6]]
-; CHECK-DISABLED-NEXT:    store i32 0, i32* [[X]], align 4, !noalias !8
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
-; CHECK-DISABLED-NEXT:    store i32 1, i32* [[ARRAYIDX1_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-DISABLED-NEXT:    store i32 0, ptr [[X]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
+; CHECK-DISABLED-NEXT:    store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias !8
 ; CHECK-DISABLED-NEXT:    [[SEXT:%.*]] = shl i64 [[N]], 32
 ; CHECK-DISABLED-NEXT:    [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM_I]]
-; CHECK-DISABLED-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], i32* [[ARRAYIDX2_I]], align 4, !noalias !8
-; CHECK-DISABLED-NEXT:    call void @usei8ptr(i8* nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
+; CHECK-DISABLED-NEXT:    store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    call void @usei8ptr(ptr nocapture [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    br label [[FOR_COND_I:%.*]]
 ; CHECK-DISABLED:       for.cond.i:
 ; CHECK-DISABLED-NEXT:    [[I_0_I:%.*]] = phi i32 [ 2, [[USER_CODE_ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
@@ -235,50 +235,50 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK-DISABLED:       for.body.i:
 ; CHECK-DISABLED-NEXT:    [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
 ; CHECK-DISABLED-NEXT:    [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM4_I]]
-; CHECK-DISABLED-NEXT:    store i32 [[SUB3_I]], i32* [[ARRAYIDX5_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
+; CHECK-DISABLED-NEXT:    store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias !8
 ; CHECK-DISABLED-NEXT:    [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
 ; CHECK-DISABLED-NEXT:    br label [[FOR_COND_I]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-DISABLED:       __omp_outlined__.exit:
-; CHECK-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** null, i64 0)
-; CHECK-DISABLED-NEXT:    [[CALL_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8
+; CHECK-DISABLED-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
+; CHECK-DISABLED-NEXT:    [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias !8
 ; CHECK-DISABLED-NEXT:    [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM6_I]]
-; CHECK-DISABLED-NEXT:    store i32 [[CALL_I]], i32* [[ARRAYIDX7_I]], align 4, !noalias !8
-; CHECK-DISABLED-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
 ; CHECK-DISABLED-NEXT:    [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM9_I]]
-; CHECK-DISABLED-NEXT:    store i32 [[CALL8_I]], i32* [[ARRAYIDX10_I]], align 4, !noalias !8
-; CHECK-DISABLED-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
 ; CHECK-DISABLED-NEXT:    [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
-; CHECK-DISABLED-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM12_I]]
-; CHECK-DISABLED-NEXT:    store i32 [[CALL11_I]], i32* [[ARRAYIDX13_I]], align 4, !noalias !8
-; CHECK-DISABLED-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-DISABLED-NEXT:    [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-DISABLED-NEXT:    [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1) #[[ATTR6]]
+; CHECK-DISABLED-NEXT:    [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
+; CHECK-DISABLED-NEXT:    store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias !8
+; CHECK-DISABLED-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-DISABLED-NEXT:    [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-DISABLED-NEXT:    [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
+; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 1) #[[ATTR6]]
 ; CHECK-DISABLED-NEXT:    ret void
 ; CHECK-DISABLED:       worker.exit:
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
   %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
-  store i32 0, i32* %x, align 4, !noalias !8
-  %arrayidx1.i = getelementptr inbounds i32, i32* %x, i64 1
-  store i32 1, i32* %arrayidx1.i, align 4, !noalias !8
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+  store i32 0, ptr %x, align 4, !noalias !8
+  %arrayidx1.i = getelementptr inbounds i32, ptr %x, i64 1
+  store i32 1, ptr %arrayidx1.i, align 4, !noalias !8
   %sext = shl i64 %N, 32
   %idxprom.i = ashr exact i64 %sext, 32
-  %arrayidx2.i = getelementptr inbounds i32, i32* %x, i64 %idxprom.i
-  store i32 %N.addr.sroa.0.0.extract.trunc, i32* %arrayidx2.i, align 4, !noalias !8
-  %heap2stack = call align 8 i8* @__kmpc_alloc_shared(i64 8)
-  call void @usei8ptr(i8* nocapture %heap2stack) willreturn nounwind
-  call void @__kmpc_free_shared(i8* %heap2stack, i64 8)
+  %arrayidx2.i = getelementptr inbounds i32, ptr %x, i64 %idxprom.i
+  store i32 %N.addr.sroa.0.0.extract.trunc, ptr %arrayidx2.i, align 4, !noalias !8
+  %heap2stack = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+  call void @usei8ptr(ptr nocapture %heap2stack) willreturn nounwind
+  call void @__kmpc_free_shared(ptr %heap2stack, i64 8)
   br label %for.cond.i
 
 for.cond.i:                                       ; preds = %for.body.i, %user_code.entry
@@ -290,42 +290,42 @@ for.cond.i:                                       ; preds = %for.body.i, %user_c
 for.body.i:                                       ; preds = %for.cond.i
   %sub3.i = add nsw i32 %i.0.i, -1
   %idxprom4.i = zext i32 %i.0.i to i64
-  %arrayidx5.i = getelementptr inbounds i32, i32* %x, i64 %idxprom4.i
-  store i32 %sub3.i, i32* %arrayidx5.i, align 4, !noalias !8
+  %arrayidx5.i = getelementptr inbounds i32, ptr %x, i64 %idxprom4.i
+  store i32 %sub3.i, ptr %arrayidx5.i, align 4, !noalias !8
   %inc.i = add nuw nsw i32 %i.0.i, 1
   br label %for.cond.i, !llvm.loop !11
 
 __omp_outlined__.exit:                            ; preds = %for.cond.i
-  call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** null, i64 0)
-  %call.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
+  call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr null, i64 0)
+  %call.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
   %idxprom6.i = sext i32 %call.i to i64
-  %arrayidx7.i = getelementptr inbounds i32, i32* %x, i64 %idxprom6.i
-  store i32 %call.i, i32* %arrayidx7.i, align 4, !noalias !8
-  %call8.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
+  %arrayidx7.i = getelementptr inbounds i32, ptr %x, i64 %idxprom6.i
+  store i32 %call.i, ptr %arrayidx7.i, align 4, !noalias !8
+  %call8.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
   %idxprom9.i = sext i32 %call8.i to i64
-  %arrayidx10.i = getelementptr inbounds i32, i32* %x, i64 %idxprom9.i
-  store i32 %call8.i, i32* %arrayidx10.i, align 4, !noalias !8
-  %call11.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
+  %arrayidx10.i = getelementptr inbounds i32, ptr %x, i64 %idxprom9.i
+  store i32 %call8.i, ptr %arrayidx10.i, align 4, !noalias !8
+  %call11.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
   %idxprom12.i = sext i32 %call11.i to i64
-  %arrayidx13.i = getelementptr inbounds i32, i32* %x, i64 %idxprom12.i
-  store i32 %call11.i, i32* %arrayidx13.i, align 4, !noalias !8
-  %call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
-  %call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
-  %call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3
+  %arrayidx13.i = getelementptr inbounds i32, ptr %x, i64 %idxprom12.i
+  store i32 %call11.i, ptr %arrayidx13.i, align 4, !noalias !8
+  %call14.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
+  %call15.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
+  %call16.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
+  call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
   ret void
 
 worker.exit:                                      ; preds = %entry
   ret void
 }
 
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
-; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) {
+; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
-; CHECK-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) {
+; CHECK-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
 ; CHECK-DISABLED-NEXT:    ret void
 ;
   ret void
@@ -342,35 +342,35 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) {
   ret void
 }
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-NEXT:    ret i32 0
 ;
 ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-DISABLED-NEXT:    ret i32 0
 ;
   ret i32 0
 }
 
 ; Function Attrs: convergent
-declare i32 @no_openmp(i32*) #1
+declare i32 @no_openmp(ptr) #1
 
 ; Function Attrs: convergent nounwind readonly willreturn
 declare void @pure() #2
 
-declare i8* @__kmpc_alloc_shared(i64)
-declare void @__kmpc_free_shared(i8* nocapture, i64)
-declare void @usei8ptr(i8*) #1
+declare ptr @__kmpc_alloc_shared(i64)
+declare void @__kmpc_free_shared(ptr nocapture, i64)
+declare void @usei8ptr(ptr) #1
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
+declare i32 @__kmpc_global_thread_num(ptr) #3
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
 declare void @llvm.experimental.noalias.scope.decl(metadata) #4
@@ -388,7 +388,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
 !llvm.ident = !{!7}
 
 !0 = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-!1 = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
+!1 = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
 !2 = !{i32 1, !"wchar_size", i32 4}
 !3 = !{i32 7, !"openmp", i32 50}
 !4 = !{i32 7, !"openmp-device", i32 50}
@@ -426,7 +426,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
 ; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
+; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
 ; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
@@ -440,7 +440,7 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame
 ; CHECK: [[META12:![0-9]+]] = !{!"llvm.loop.mustprogress"}
 ;.
 ; CHECK-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 42, i32 16513658, !"sequential_loop", i32 6, i32 0}
-; CHECK-DISABLED: [[META1:![0-9]+]] = !{void (i32*, i64)* @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
+; CHECK-DISABLED: [[META1:![0-9]+]] = !{ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6, !"kernel", i32 1}
 ; CHECK-DISABLED: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
index 42ba49cfb70f99..c65f8d8c8179a4 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
@@ -29,45 +29,45 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1
 @__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1
-@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
 @G = external global i32, align 4
-@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
 
 ;.
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
-; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
 ; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, i8* @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
 ; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
 ; CHECK-NEXT:    call void @spmd_helper() #[[ATTR5:[0-9]+]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       worker.exit:
 ; CHECK-NEXT:    ret void
@@ -75,8 +75,8 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
 ; CHECK-DISABLE-SPMDIZATION:       is_worker_check:
@@ -86,11 +86,11 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
 ; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.begin:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
 ; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.finished:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
@@ -107,26 +107,26 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_kernel_end_parallel()
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
 ; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.done.barrier:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
 ; CHECK-DISABLE-SPMDIZATION:       thread.user_code.check:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK-DISABLE-SPMDIZATION:       user_code.entry:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @spmd_helper() #[[ATTR5:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ; CHECK-DISABLE-SPMDIZATION:       worker.exit:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
   call void @spmd_helper() #5
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
@@ -134,26 +134,26 @@ worker.exit:                                      ; preds = %entry
 }
 
 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
 ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-NEXT:    ret i32 0
 ;
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLE-SPMDIZATION-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
+; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret i32 0
 ;
   ret i32 0
 }
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8)
+declare void @__kmpc_target_deinit(ptr, i8)
 
 ; Function Attrs: convergent noinline norecurse nounwind
 define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
 ; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
 ; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
 ; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -164,7 +164,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
 ; CHECK-NEXT:    call void @generic_helper() #[[ATTR5]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
+; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       worker.exit:
 ; CHECK-NEXT:    ret void
@@ -172,24 +172,24 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false)
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK-DISABLE-SPMDIZATION:       user_code.entry:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @generic_helper() #[[ATTR5]]
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1)
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ; CHECK-DISABLE-SPMDIZATION:       worker.exit:
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true)
+  %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
   call void @generic_helper() #5
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1)
+  call void @__kmpc_target_deinit(ptr @1, i8 1)
   ret void
 
 worker.exit:                                      ; preds = %entry
@@ -201,55 +201,52 @@ define internal void @spmd_helper() #1 {
 ; CHECK-LABEL: define {{[^@]+}}@spmd_helper
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; CHECK-NEXT:    call void @leaf() #[[ATTR5]]
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@spmd_helper
 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @leaf() #[[ATTR5]]
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* @__omp_outlined___wrapper.ID, i8** [[TMP1]], i64 0)
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]]
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
+  %captured_vars_addrs = alloca [0 x ptr], align 8
   call void @leaf() #5
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
+  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
+  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
   ret void
 }
 
 ; Function Attrs: convergent noinline norecurse nounwind
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
+define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    call void @unknown() #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-DISABLE-SPMDIZATION-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; CHECK-DISABLE-SPMDIZATION-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @unknown() #[[ATTR6:[0-9]+]]
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  %.global_tid..addr = alloca ptr, align 8
+  %.bound_tid..addr = alloca ptr, align 8
+  store ptr %.global_tid., ptr %.global_tid..addr, align 8
+  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
   call void @unknown() #5
   ret void
 }
@@ -262,9 +259,9 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
 ; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
+; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; CHECK-NEXT:    call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret void
 ;
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
@@ -273,31 +270,31 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-DISABLE-SPMDIZATION-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
+; CHECK-DISABLE-SPMDIZATION-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
   %.addr = alloca i16, align 2
   %.addr1 = alloca i32, align 4
   %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__(i32* %.addr1, i32* %.zero.addr) #3
+  %global_args = alloca ptr, align 8
+  store i16 %0, ptr %.addr, align 2
+  store i32 %1, ptr %.addr1, align 4
+  store i32 0, ptr %.zero.addr, align 4
+  call void @__kmpc_get_shared_variables(ptr %global_args)
+  call void @__omp_outlined__(ptr %.addr1, ptr %.zero.addr) #3
   ret void
 }
 
-declare void @__kmpc_get_shared_variables(i8***)
+declare void @__kmpc_get_shared_variables(ptr)
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3
+declare i32 @__kmpc_global_thread_num(ptr) #3
 
 ; Function Attrs: alwaysinline
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
 
 ; Function Attrs: convergent noinline nounwind
 define internal void @leaf() #1 {
@@ -310,12 +307,12 @@ define internal void @leaf() #1 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
-; CHECK-NEXT:    store i32 42, i32* @G, align 4
+; CHECK-NEXT:    store i32 42, ptr @G, align 4
 ; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB3]], i32 [[TMP0]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    ret void
@@ -323,11 +320,11 @@ define internal void @leaf() #1 {
 ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@leaf
 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4:[0-9]+]] {
 ; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT:    store i32 42, i32* @G, align 4
+; CHECK-DISABLE-SPMDIZATION-NEXT:    store i32 42, ptr @G, align 4
 ; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
 ;
 entry:
-  store i32 42, i32* @G, align 4
+  store i32 42, ptr @G, align 4
   ret void
 }
 
@@ -366,8 +363,8 @@ attributes #5 = { convergent }
 
 !0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
 !1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-!2 = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-!3 = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
+!2 = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
+!3 = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
 !4 = !{i32 1, !"wchar_size", i32 4}
 !5 = !{i32 7, !"openmp", i32 50}
 !6 = !{i32 7, !"openmp-device", i32 50}
@@ -393,8 +390,8 @@ attributes #5 = { convergent }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
 ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
 ; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
@@ -404,8 +401,8 @@ attributes #5 = { convergent }
 ;.
 ; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
 ; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
-; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{void ()* @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
+; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_spmd_l12, !"kernel", i32 1}
+; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{ptr @__omp_offloading_2b_10393b5_generic_l20, !"kernel", i32 1}
 ; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
index 6164519e0b2f1d..ff5ce93f66b98e 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
@@ -37,32 +37,32 @@ target triple = "nvptx64"
 ;;   }
 ;; }
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
-@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @0, i32 0, i32 0) }, align 8
+@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1
-@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([72 x i8], [72 x i8]* @2, i32 0, i32 0) }, align 8
+@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
 @4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
-@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([104 x i8], [104 x i8]* @4, i32 0, i32 0) }, align 8
+@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
 @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
 @6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
-@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @6, i32 0, i32 0) }, align 8
+@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
 @8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
-@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([75 x i8], [75 x i8]* @8, i32 0, i32 0) }, align 8
+@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
 @10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
-@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([107 x i8], [107 x i8]* @10, i32 0, i32 0) }, align 8
+@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
 @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
 @12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
-@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([63 x i8], [63 x i8]* @12, i32 0, i32 0) }, align 8
+@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
 @G = external global i32
-@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
+@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
 
 ; Function Attrs: convergent norecurse nounwind
 define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
 entry:
-  %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18
+  %captured_vars_addrs.i.i = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
   %exec_user_code = icmp eq i32 %0, -1, !dbg !18
   br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
 
@@ -70,20 +70,18 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void, !dbg !19
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3
   call void @unknown() #6, !dbg !20
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3
+  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
   call void @unknown() #6, !dbg !27
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28
+  call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
   br label %common.ret
 }
 
-define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
   ret i32 0
 }
 
@@ -94,23 +92,22 @@ declare void @unknown() local_unnamed_addr #1
 ; Function Attrs: nounwind
 define hidden void @known() local_unnamed_addr #2 !dbg !29 {
 entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
-  %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
-  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
+  %captured_vars_addrs = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13)
+  call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30
   ret void, !dbg !31
 }
 
 ; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
+declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
 
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
 
 ; Function Attrs: norecurse nounwind
 define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
 entry:
-  %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33
+  %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
+  %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
   %exec_user_code = icmp eq i32 %0, -1, !dbg !33
   br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
 
@@ -118,28 +115,26 @@ common.ret:                                       ; preds = %entry, %user_code.e
   ret void, !dbg !34
 
 user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
+  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42
+  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
+  %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
+  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
+  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
   call void @spmd_amenable()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46
+  call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
   br label %common.ret
 }
 
 ; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
+define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
 entry:
   call void @unknown() #6, !dbg !48
   ret void, !dbg !49
@@ -148,21 +143,21 @@ entry:
 ; Function Attrs: convergent norecurse nounwind
 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
 entry:
-  %global_args = alloca i8**, align 8
-  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
+  %global_args = alloca ptr, align 8
+  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51
   call void @unknown() #6, !dbg !52
   ret void, !dbg !51
 }
 
-declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
+declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr
 
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5
 
 declare void @spmd_amenable() #7
 
@@ -186,8 +181,8 @@ attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
 !2 = !{}
 !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
 !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
+!5 = !{ptr @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
+!6 = !{ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
 !7 = !{i32 7, !"Dwarf Version", i32 2}
 !8 = !{i32 2, !"Debug Info Version", i32 3}
 !9 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index add432de2e34a4..7a45eb5e8be019 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -4,7 +4,7 @@
 
 target triple = "amdgcn-amd-amdhsa"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @G = internal addrspace(3) global i32 undef, align 4
 
@@ -16,84 +16,84 @@ define void @kernel() "kernel" {
 ; TUNIT: Function Attrs: norecurse
 ; TUNIT-LABEL: define {{[^@]+}}@kernel
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
-; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
 ; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
 ; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TUNIT:       if.then:
-; TUNIT-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    store i32 1, ptr addrspace(3) @G, align 4
 ; TUNIT-NEXT:    br label [[IF_MERGE:%.*]]
 ; TUNIT:       if.else:
 ; TUNIT-NEXT:    call void @barrier() #[[ATTR4:[0-9]+]]
-; TUNIT-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
 ; TUNIT-NEXT:    call void @use1(i32 [[L]]) #[[ATTR4]]
 ; TUNIT-NEXT:    br label [[IF_MERGE]]
 ; TUNIT:       if.merge:
 ; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
 ; TUNIT:       if.then2:
-; TUNIT-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @G, align 4
 ; TUNIT-NEXT:    call void @barrier() #[[ATTR4]]
 ; TUNIT-NEXT:    br label [[IF_END]]
 ; TUNIT:       if.end:
-; TUNIT-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; TUNIT-NEXT:    call void @__kmpc_target_deinit(ptr undef, i8 1)
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: norecurse
 ; CGSCC-LABEL: define {{[^@]+}}@kernel
 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
-; CGSCC-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
 ; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CGSCC:       if.then:
-; CGSCC-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    store i32 1, ptr addrspace(3) @G, align 4
 ; CGSCC-NEXT:    br label [[IF_MERGE:%.*]]
 ; CGSCC:       if.else:
 ; CGSCC-NEXT:    call void @barrier()
-; CGSCC-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
 ; CGSCC-NEXT:    call void @use1(i32 [[L]])
 ; CGSCC-NEXT:    br label [[IF_MERGE]]
 ; CGSCC:       if.merge:
 ; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
 ; CGSCC:       if.then2:
-; CGSCC-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @G, align 4
 ; CGSCC-NEXT:    call void @barrier()
 ; CGSCC-NEXT:    br label [[IF_END]]
 ; CGSCC:       if.end:
-; CGSCC-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; CGSCC-NEXT:    call void @__kmpc_target_deinit(ptr undef, i8 1)
 ; CGSCC-NEXT:    ret void
 ;
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+  %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
   %cmp = icmp eq i32 %call, -1
   br i1 %cmp, label %if.then, label %if.else
 if.then:
-  store i32 1, i32 addrspace(3)* @G
+  store i32 1, ptr addrspace(3) @G
   br label %if.merge
 if.else:
   call void @barrier();
-  %l = load i32, i32 addrspace(3)* @G
+  %l = load i32, ptr addrspace(3) @G
   call void @use1(i32 %l)
   br label %if.merge
 if.merge:
   br i1 %cmp, label %if.then2, label %if.end
 if.then2:
-  store i32 2, i32 addrspace(3)* @G
+  store i32 2, ptr addrspace(3) @G
   call void @barrier();
   br label %if.end
 if.end:
-  call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+  call void @__kmpc_target_deinit(ptr undef, i8 1)
   ret void
 }
 
 declare void @barrier() norecurse nounwind nocallback
 declare void @use1(i32) nosync norecurse nounwind nocallback
-declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback
-declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
+declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
+declare void @__kmpc_target_deinit(ptr, i8) nocallback
 
 !llvm.module.flags = !{!0, !1}
 !nvvm.annotations = !{!2}
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-!2 = !{void ()* @kernel, !"kernel", i32 1}
+!2 = !{ptr @kernel, !"kernel", i32 1}
 
 ;.
 ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
@@ -109,7 +109,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
index 2f317fb30e864f..14e563cef6680c 100644
--- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
+++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll
@@ -6,68 +6,61 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 @.__omp_offloading_heavyComputation.region_id = weak constant i8 0
 @.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
 
 @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
-@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
 
 ; CHECK-LABEL: {{[^@]+}}Successfully got offload values:
-; CHECK-NEXT: offload_baseptrs: double* %a ---   %size.addr = alloca i32, align 4 ---
-; CHECK-NEXT: offload_ptrs: double* %a ---   %size.addr = alloca i32, align 4 ---
+; CHECK-NEXT: offload_baseptrs: ptr %a ---   %size.addr = alloca i32, align 4 ---
+; CHECK-NEXT: offload_ptrs: ptr %a ---   %size.addr = alloca i32, align 4 ---
 ; CHECK-NEXT: offload_sizes:   %0 = shl nuw nsw i64 %conv, 3 --- i64 4 ---
 
-;int heavyComputation(double* a, unsigned size) {
+;int heavyComputation(ptr a, unsigned size) {
 ;  int random = rand() % 7;
 ;
 ;  //#pragma omp target data map(a[0:size], size)
-;  void* args[2];
+;  ptr args[2];
 ;  args[0] = &a;
 ;  args[1] = &size;
 ;  __tgt_target_data_begin(..., args, ...)
 ;
 ;  #pragma omp target teams
 ;  for (int i = 0; i < size; ++i) {
-;    a[i] = ++a[i] * 3.141624;
+;    a[i] = ++aptr 3.141624;
 ;  }
 ;
 ;  return random;
 ;}
-define dso_local i32 @heavyComputation(double* %a, i32 %size) {
+define dso_local i32 @heavyComputation(ptr %a, i32 %size) {
 entry:
   %size.addr = alloca i32, align 4
-  %.offload_baseptrs = alloca [2 x i8*], align 8
-  %.offload_ptrs = alloca [2 x i8*], align 8
+  %.offload_baseptrs = alloca [2 x ptr], align 8
+  %.offload_ptrs = alloca [2 x ptr], align 8
   %.offload_sizes = alloca [2 x i64], align 8
 
-  store i32 %size, i32* %size.addr, align 4
+  store i32 %size, ptr %size.addr, align 4
   %call = tail call i32 (...) @rand()
 
   %conv = zext i32 %size to i64
   %0 = shl nuw nsw i64 %conv, 3
-  %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0
-  %2 = bitcast [2 x i8*]* %.offload_baseptrs to double**
-  store double* %a, double** %2, align 8
-  %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0
-  %4 = bitcast [2 x i8*]* %.offload_ptrs to double**
-  store double* %a, double** %4, align 8
-  %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0
-  store i64 %0, i64* %5, align 8
-  %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1
-  %7 = bitcast i8** %6 to i32**
-  store i32* %size.addr, i32** %7, align 8
-  %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1
-  %9 = bitcast i8** %8 to i32**
-  store i32* %size.addr, i32** %9, align 8
-  %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
-  store i64 4, i64* %10, align 8
-  call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null)
+  store ptr %a, ptr %.offload_baseptrs, align 8
+  store ptr %a, ptr %.offload_ptrs, align 8
+  store i64 %0, ptr %.offload_sizes, align 8
+  %1 = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i64 0, i64 1
+  store ptr %size.addr, ptr %1, align 8
+  %2 = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i64 0, i64 1
+  store ptr %size.addr, ptr %2, align 8
+  %3 = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i64 0, i64 1
+  store i64 4, ptr %3, align 8
+  call void @__tgt_target_data_begin_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null)
   %rem = srem i32 %call, 7
-  call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes., i64 0, i64 0), i8** null, i8** null)
+  call void @__tgt_target_data_end_mapper(ptr @0, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr @.offload_maptypes., ptr null, ptr null)
   ret i32 %rem
 }
 
-declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
-declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
+declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
+declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr)
 
 declare dso_local i32 @rand(...)