Expand Up
@@ -309,16 +309,19 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[A_CASTED2:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[AA_CASTED3:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[A_CASTED10:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK1-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
Expand Down
Expand Up
@@ -352,7 +355,6 @@ int bar(int n){
// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP19]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -410,7 +412,6 @@ int bar(int n){
// CHECK1-NEXT: store ptr null, ptr [[TMP44]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP47]], align 4
// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
Expand Down
Expand Up
@@ -521,7 +522,6 @@ int bar(int n){
// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP100]], align 4
// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
Expand Down
Expand Up
@@ -911,6 +911,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
Expand Down
Expand Up
@@ -970,7 +971,6 @@ int bar(int n){
// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP29]], align 4
// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -1035,6 +1035,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: store i16 0, ptr [[AA]], align 2
Expand Down
Expand Up
@@ -1078,7 +1079,6 @@ int bar(int n){
// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8
// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP21]], align 4
// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -1133,6 +1133,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
// CHECK1-NEXT: store i16 0, ptr [[AA]], align 2
Expand Down
Expand Up
@@ -1166,7 +1167,6 @@ int bar(int n){
// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -1412,16 +1412,19 @@ int bar(int n){
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[A_CASTED2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AA_CASTED3:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: [[A_CASTED10:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [9 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK3-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
Expand Down
Expand Up
@@ -1453,7 +1456,6 @@ int bar(int n){
// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP17]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -1511,7 +1513,6 @@ int bar(int n){
// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4
// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP45]], align 4
// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
Expand Down
Expand Up
@@ -1624,7 +1625,6 @@ int bar(int n){
// CHECK3-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
// CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
// CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP100]], align 4
// CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
Expand Down
Expand Up
@@ -2014,6 +2014,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
Expand Down
Expand Up
@@ -2073,7 +2074,6 @@ int bar(int n){
// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP29]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -2138,6 +2138,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: store i16 0, ptr [[AA]], align 2
Expand Down
Expand Up
@@ -2181,7 +2182,6 @@ int bar(int n){
// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4
// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP21]], align 4
// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down
Expand Up
@@ -2236,6 +2236,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
// CHECK3-NEXT: store i16 0, ptr [[AA]], align 2
Expand Down
Expand Up
@@ -2269,7 +2270,6 @@ int bar(int n){
// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4
// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4
// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
Expand Down