Skip to content

Commit

Permalink
PR #11643: Fix unexpected resursive call in ksl for loop
Browse files Browse the repository at this point in the history
Imported from GitHub PR openxla/xla#11643

ksl does not have this func: ForWithStatus(string_view, llvm::Value*, llvm::Value*, llvm::Value, bool, const std::function<Status(llvm::Value*, bool)>&), only has ForWithStatus(string_view, llvm::Value*, llvm::Value*, llvm::Value, const std::function<Status(llvm::Value*, bool)>&) which is the expected func call. So if use peel first iteration, will call ForWithStatus(string_view, llvm::Value*, llvm::Value*, llvm::Value, bool, const std::function<Status(llvm::Value*, llvm::Value*)>&) recursively (std::function<Status(llvm::Value*, bool)> will cast to std::function<Status(llvm::Value*, llvm::Value*)> implicitly on my running enviroment), which leads stack smash.
Copybara import of the project:

--
daa71e5687726f30cc8d0da6419e576d4ddedff8 by Zhou, Lingzhi <lingzhi.zhou@intel.com>:

fix unexpected resursive call

--
d67a61ea83d8204c034f6eadb98bebc7fecbcd27 by Zhou, Lingzhi <lingzhi.zhou@intel.com>:

remove deadcode

--
7e1c236da49cf4012cac372db5c7760876563461 by Zhou, Lingzhi <lingzhi.zhou@intel.com>:

fix build error

--
e5ac16d665d88b8b8d1258b1e0e4ebfd94813547 by Zhou, Lingzhi <lingzhi.zhou@intel.com>:

fix filecheck error

Merging this change closes #11643

PiperOrigin-RevId: 627980814
  • Loading branch information
lingzhi98 authored and tensorflower-gardener committed Apr 25, 2024
1 parent 702dbc0 commit 3ad6844
Show file tree
Hide file tree
Showing 13 changed files with 25 additions and 129 deletions.
5 changes: 3 additions & 2 deletions third_party/xla/xla/service/cpu/tiled_dot_emitter.cc
Expand Up @@ -360,8 +360,9 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
ksl_.For(
"dot.inner.epilg.outer", /*start=*/current_tile_col,
/*end=*/b_->CreateAdd(columns_llvm, current_tile_col),
/*step=*/1, /*peel_first_iteration=*/false,
[&](llvm::Value* col, llvm::Value* is_first_scalar_col) {
/*step=*/1, [&](llvm::Value* col) {
llvm::Value* is_first_scalar_col =
b_->CreateICmpEQ(col, current_tile_col);
llvm::Value* rhs_element = vsl_.LoadScalar(rhs_, col);
llvm::Value* total_offset = b_->CreateMul(col, b_->getInt64(m()));
llvm::Value* lhs_base_pointer =
Expand Down
2 changes: 0 additions & 2 deletions third_party/xla/xla/service/gpu/tests/reduce_atomic_min.hlo
Expand Up @@ -149,7 +149,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body: ; preds = %[[VAL_107]]
// CHECK: %[[VAL_111:.*]] = add nuw nsw i32 %[[VAL_109]], 1
// CHECK: store i32 %[[VAL_111]], ptr %[[VAL_43]], align 4
// CHECK: %[[VAL_112:.*]] = icmp eq i32 %[[VAL_109]], 0
// CHECK: %[[OFFSET_2:.*]] = add i32 %loop2.indvar, %thread.id.2
// CHECK: store i32 0, ptr %loop3.invar_address, align 4
// CHECK: br label %loop3.loop_header
Expand Down Expand Up @@ -189,7 +188,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body5:
// CHECK: %[[VAL_139:.*]] = add nuw nsw i32 %[[VAL_136]], 1
// CHECK: store i32 %[[VAL_139]], ptr %[[LOOP2_I_2]], align 4
// CHECK: %[[VAL_140:.*]] = icmp eq i32 %[[VAL_136]], 0
// CHECK: %[[VAL_141:.*]] = add i32 %[[VAL_136]], %thread.id.2
// CHECK: %[[VAL_144:.*]] = icmp ult i32 %[[VAL_141]], %tile_bound.2
// CHECK: br i1 %[[VAL_144]], label %x_in_tile-true, label %x_in_tile-after
Expand Down
Expand Up @@ -71,7 +71,6 @@ ENTRY kernel_entry {
// CHECK: loop1.loop_body: ; preds = %[[VAL_29]]
// CHECK: %[[VAL_35:.*]] = add nuw nsw i32 %[[VAL_31]], 32
// CHECK: store i32 %[[VAL_35]], ptr %[[VAL_12]], align 4
// CHECK: %[[VAL_36:.*]] = icmp eq i32 %[[VAL_31]], %thread.id.1
// CHECK: store i32 0, ptr %[[VAL_11]], align 4
// CHECK: br label %[[VAL_37:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_38:.*]], %[[VAL_34]]
Expand All @@ -81,7 +80,6 @@ ENTRY kernel_entry {
// CHECK: loop2.loop_body: ; preds = %[[VAL_37]]
// CHECK: %[[VAL_42:.*]] = add nuw nsw i32 %[[VAL_39]], 32
// CHECK: store i32 %[[VAL_42]], ptr %[[VAL_11]], align 4
// CHECK: %[[VAL_43:.*]] = icmp eq i32 %[[VAL_39]], 0
// CHECK: %[[VAL_44:.*]] = add i32 %[[VAL_39]], %thread.id.2
// CHECK: %[[VAL_45:.*]] = icmp ult i32 %[[VAL_44]], 32
// CHECK: br i1 %[[VAL_45]], label %[[VAL_46:.*]], label %[[VAL_38]]
Expand Down
2 changes: 0 additions & 2 deletions third_party/xla/xla/service/gpu/tests/reduce_f64_column.hlo
Expand Up @@ -78,7 +78,6 @@ ENTRY e {
// CHECK: loop1.loop_body: ; preds = %[[VAL_29]]
// CHECK: %[[VAL_35:.*]] = add nuw nsw i32 %[[VAL_31]], 32
// CHECK: store i32 %[[VAL_35]], ptr{{.*}}%[[VAL_12]], align 4
// CHECK: %[[VAL_36:.*]] = icmp eq i32 %[[VAL_31]], %thread.id.1
// CHECK: store i32 0, ptr{{.*}}%[[VAL_11]], align 4
// CHECK: br label %[[VAL_37:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_38:.*]], %[[VAL_34]]
Expand All @@ -88,7 +87,6 @@ ENTRY e {
// CHECK: loop2.loop_body: ; preds = %[[VAL_37]]
// CHECK: %[[VAL_42:.*]] = add nuw nsw i32 %[[VAL_39]], 32
// CHECK: store i32 %[[VAL_42]], ptr{{.*}}%[[VAL_11]], align 4
// CHECK: %[[VAL_43:.*]] = icmp eq i32 %[[VAL_39]], 0
// CHECK: %[[VAL_44:.*]] = add i32 %[[VAL_39]], %thread.id.2
// CHECK: %[[VAL_45:.*]] = icmp ult i32 %[[VAL_44]], 32
// CHECK: br i1 %[[VAL_45]], label %[[VAL_46:.*]], label %[[VAL_38]]
Expand Down
Expand Up @@ -190,7 +190,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body: ; preds = %[[VAL_133]]
// CHECK: %[[VAL_138:.*]] = add nuw nsw i32 %[[VAL_135]], 640
// CHECK: store i32 %[[VAL_138]], ptr %[[VAL_27]], align 4
// CHECK: %[[VAL_139:.*]] = icmp eq i32 %[[VAL_135]], 0
// CHECK: %[[VAL_140:.*]] = add i32 %[[VAL_135]], %thread.id.2
// CHECK: store i32 0, ptr %[[VAL_26]], align 4
// CHECK: br label %[[VAL_141:.*]]
Expand All @@ -201,7 +200,6 @@ ENTRY reduce.1 {
// CHECK: loop3.loop_body: ; preds = %[[VAL_141]]
// CHECK: %[[VAL_145:.*]] = add nuw nsw i32 %[[VAL_143]], 1
// CHECK: store i32 %[[VAL_145]], ptr %[[VAL_26]], align 4
// CHECK: %[[VAL_146:.*]] = icmp eq i32 %[[VAL_143]], 0
// CHECK: %[[VAL_147:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_148:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_149:.*]] = add i32 %tile_origin.2, %[[VAL_140]]
Expand Down Expand Up @@ -236,7 +234,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body5: ; preds = %[[VAL_164]]
// CHECK: %[[VAL_169:.*]] = add nuw nsw i32 %[[VAL_166]], 640
// CHECK: store i32 %[[VAL_169]], ptr %[[VAL_24]], align 4
// CHECK: %[[VAL_170:.*]] = icmp eq i32 %[[VAL_166]], 0
// CHECK: %[[VAL_171:.*]] = add i32 %[[VAL_166]], %thread.id.2
// CHECK: %[[VAL_172:.*]] = icmp ult i32 %[[VAL_171]], %tile_bound.2
// CHECK: br i1 %[[VAL_172]], label %[[VAL_173:.*]], label %[[VAL_165]]
Expand All @@ -254,7 +251,6 @@ ENTRY reduce.1 {
// CHECK: loop3.loop_body11: ; preds = %[[VAL_175]]
// CHECK: %[[VAL_179:.*]] = add nuw nsw i32 %[[VAL_177]], 1
// CHECK: store i32 %[[VAL_179]], ptr %[[VAL_23]], align 4
// CHECK: %[[VAL_180:.*]] = icmp eq i32 %[[VAL_177]], 0
// CHECK: %[[VAL_181:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_182:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_183:.*]] = add i32 %tile_origin.2, %[[VAL_171]]
Expand Down
Expand Up @@ -89,7 +89,6 @@ ENTRY reduce.1 {
// CHECK: loop1.loop_body: ; preds = %[[VAL_44]]
// CHECK: %[[VAL_50:.*]] = add nuw nsw i32 %[[VAL_46]], 4
// CHECK: store i32 %[[VAL_50]], ptr %[[VAL_27]], align 4
// CHECK: %[[VAL_51:.*]] = icmp eq i32 %[[VAL_46]], %thread.id.1
// CHECK: br i1 true, label %[[VAL_52:.*]], label %[[VAL_53:.*]]
// CHECK: is_full_tile-after: ; preds = %[[VAL_54:.*]], %[[VAL_55:.*]]
// CHECK: br label %[[VAL_44]], !llvm.loop !5
Expand Down Expand Up @@ -139,7 +138,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body: ; preds = %[[VAL_74]]
// CHECK: %[[VAL_79:.*]] = add nuw nsw i32 %[[VAL_76]], 64
// CHECK: store i32 %[[VAL_79]], ptr %[[VAL_26]], align 4
// CHECK: %[[VAL_80:.*]] = icmp eq i32 %[[VAL_76]], 0
// CHECK: %[[VAL_81:.*]] = add i32 %[[VAL_76]], %thread.id.2
// CHECK: store i32 0, ptr %[[VAL_25]], align 4
// CHECK: br label %[[VAL_82:.*]]
Expand All @@ -150,7 +148,6 @@ ENTRY reduce.1 {
// CHECK: loop3.loop_body: ; preds = %[[VAL_82]]
// CHECK: %[[VAL_86:.*]] = add nuw nsw i32 %[[VAL_84]], 1
// CHECK: store i32 %[[VAL_86]], ptr %[[VAL_25]], align 4
// CHECK: %[[VAL_87:.*]] = icmp eq i32 %[[VAL_84]], 0
// CHECK: %[[VAL_88:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_89:.*]] = add i32 %tile_origin.1, %[[VAL_46]]
// CHECK: %[[VAL_90:.*]] = add i32 %tile_origin.2, %[[VAL_81]]
Expand Down Expand Up @@ -186,7 +183,6 @@ ENTRY reduce.1 {
// CHECK: loop2.loop_body6: ; preds = %[[VAL_106]]
// CHECK: %[[VAL_111:.*]] = add nuw nsw i32 %[[VAL_108]], 64
// CHECK: store i32 %[[VAL_111]], ptr %[[VAL_23]], align 4
// CHECK: %[[VAL_112:.*]] = icmp eq i32 %[[VAL_108]], 0
// CHECK: %[[VAL_113:.*]] = add i32 %[[VAL_108]], %thread.id.2
// CHECK: %[[VAL_114:.*]] = icmp ult i32 %[[VAL_113]], 512
// CHECK: br i1 %[[VAL_114]], label %[[VAL_115:.*]], label %[[VAL_107]]
Expand All @@ -204,7 +200,6 @@ ENTRY reduce.1 {
// CHECK: loop3.loop_body12: ; preds = %[[VAL_117]]
// CHECK: %[[VAL_121:.*]] = add nuw nsw i32 %[[VAL_119]], 1
// CHECK: store i32 %[[VAL_121]], ptr %[[VAL_22]], align 4
// CHECK: %[[VAL_122:.*]] = icmp eq i32 %[[VAL_119]], 0
// CHECK: %[[VAL_123:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_124:.*]] = add i32 %tile_origin.1, %[[VAL_46]]
// CHECK: %[[VAL_125:.*]] = add i32 %tile_origin.2, %[[VAL_113]]
Expand Down
Expand Up @@ -106,7 +106,6 @@ ENTRY main {
// CHECK: loop0.loop_body: ; preds = %[[VAL_56]]
// CHECK: %[[VAL_62:.*]] = add nuw nsw i32 %[[VAL_58]], 1
// CHECK: store i32 %[[VAL_62]], ptr{{.*}}%[[VAL_36]], align 4
// CHECK: %[[VAL_63:.*]] = icmp eq i32 %[[VAL_58]], 0
// CHECK: store i32 %thread.id.1, ptr{{.*}}%[[VAL_35]], align 4
// CHECK: br label %[[VAL_64:.*]]
// CHECK: loop1.loop_header: ; preds = %[[VAL_65:.*]], %[[VAL_61]]
Expand All @@ -116,7 +115,6 @@ ENTRY main {
// CHECK: loop1.loop_body: ; preds = %[[VAL_64]]
// CHECK: %[[VAL_69:.*]] = add nuw nsw i32 %[[VAL_66]], 8
// CHECK: store i32 %[[VAL_69]], ptr{{.*}}%[[VAL_35]], align 4
// CHECK: %[[VAL_70:.*]] = icmp eq i32 %[[VAL_66]], %thread.id.1
// CHECK: %[[VAL_71:.*]] = icmp eq i32 512, %tile_bound.2
// CHECK: br i1 %[[VAL_71]], label %[[VAL_72:.*]], label %[[VAL_73:.*]]
// CHECK: is_full_tile-after: ; preds = %[[VAL_74:.*]], %[[VAL_75:.*]]
Expand Down Expand Up @@ -294,7 +292,6 @@ ENTRY main {
// CHECK: loop2.loop_body: ; preds = %[[VAL_119]]
// CHECK: %[[VAL_123:.*]] = add nuw nsw i32 %[[VAL_121]], 32
// CHECK: store i32 %[[VAL_123]], ptr{{.*}}%[[VAL_34]], align 4
// CHECK: %[[VAL_124:.*]] = icmp eq i32 %[[VAL_121]], 0
// CHECK: %[[VAL_125:.*]] = add i32 %[[VAL_121]], %thread.id.2
// CHECK: %[[VAL_126:.*]] = add i32 %tile_origin.0, %[[VAL_58]]
// CHECK: %[[VAL_127:.*]] = add i32 %tile_origin.1, %[[VAL_66]]
Expand Down Expand Up @@ -337,7 +334,6 @@ ENTRY main {
// CHECK: loop2.loop_body10: ; preds = %[[VAL_139]]
// CHECK: %[[VAL_144:.*]] = add nuw nsw i32 %[[VAL_141]], 32
// CHECK: store i32 %[[VAL_144]], ptr{{.*}}%[[VAL_30]], align 4
// CHECK: %[[VAL_145:.*]] = icmp eq i32 %[[VAL_141]], 0
// CHECK: %[[VAL_146:.*]] = add i32 %[[VAL_141]], %thread.id.2
// CHECK: %[[VAL_147:.*]] = icmp ult i32 %[[VAL_146]], %tile_bound.2
// CHECK: br i1 %[[VAL_147]], label %[[VAL_148:.*]], label %[[VAL_140]]
Expand Down
4 changes: 0 additions & 4 deletions third_party/xla/xla/service/gpu/tests/transpose_021.hlo
Expand Up @@ -47,7 +47,6 @@ ENTRY main {
// CHECK: loop1.loop_body: ; preds = %[[VAL_12]]
// CHECK: %[[VAL_19:.*]] = add nuw nsw i32 %[[VAL_15]], 4
// CHECK: store i32 %[[VAL_19]], ptr{{.*}} %[[VAL_3]], align 4
// CHECK: %[[VAL_20:.*]] = icmp eq i32 %[[VAL_15]], %thread.id.1
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_2]], align 4
// CHECK: br label %[[VAL_21:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_22:.*]], %[[VAL_18]]
Expand All @@ -57,7 +56,6 @@ ENTRY main {
// CHECK: loop2.loop_body: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_25:.*]] = add nuw nsw i32 %[[VAL_23]], 32
// CHECK: store i32 %[[VAL_25]], ptr{{.*}} %[[VAL_2]], align 4
// CHECK: %[[VAL_26:.*]] = icmp eq i32 %[[VAL_23]], %thread.id.2
// CHECK: %[[VAL_27:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_28:.*]] = add i32 %tile_origin.1, %[[VAL_15]]
// CHECK: %[[VAL_29:.*]] = add i32 %tile_origin.2, %[[VAL_23]]
Expand All @@ -81,7 +79,6 @@ ENTRY main {
// CHECK: loop1.loop_body5: ; preds = %[[VAL_35]]
// CHECK: %[[VAL_41:.*]] = add nuw nsw i32 %[[VAL_37]], 4
// CHECK: store i32 %[[VAL_41]], ptr{{.*}} %[[VAL_1]], align 4
// CHECK: %[[VAL_42:.*]] = icmp eq i32 %[[VAL_37]], %thread.id.1
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_0]], align 4
// CHECK: br label %[[VAL_43:.*]]
// CHECK: loop2.loop_header10: ; preds = %[[VAL_44:.*]], %[[VAL_40]]
Expand All @@ -91,7 +88,6 @@ ENTRY main {
// CHECK: loop2.loop_body11: ; preds = %[[VAL_43]]
// CHECK: %[[VAL_47:.*]] = add nuw nsw i32 %[[VAL_45]], 32
// CHECK: store i32 %[[VAL_47]], ptr{{.*}} %[[VAL_0]], align 4
// CHECK: %[[VAL_48:.*]] = icmp eq i32 %[[VAL_45]], %thread.id.2
// CHECK: %[[VAL_49:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_50:.*]] = add i32 %tile_origin.2, %[[VAL_37]]
// CHECK: %[[VAL_51:.*]] = add i32 %tile_origin.1, %[[VAL_45]]
Expand Down
Expand Up @@ -50,7 +50,6 @@ ENTRY main {
// CHECK: loop1.loop_body: ; preds = %[[VAL_12]]
// CHECK: %[[VAL_19:.*]] = add nuw nsw i32 %[[VAL_15]], 4
// CHECK: store i32 %[[VAL_19]], ptr{{.*}} %[[VAL_3]], align 4
// CHECK: %[[VAL_20:.*]] = icmp eq i32 %[[VAL_15]], %thread.id.1
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_2]], align 4
// CHECK: br label %[[VAL_21:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_22:.*]], %[[VAL_18]]
Expand All @@ -60,7 +59,6 @@ ENTRY main {
// CHECK: loop2.loop_body: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_25:.*]] = add nuw nsw i32 %[[VAL_23]], 32
// CHECK: store i32 %[[VAL_25]], ptr{{.*}} %[[VAL_2]], align 4
// CHECK: %[[VAL_26:.*]] = icmp eq i32 %[[VAL_23]], %thread.id.2
// CHECK: %[[VAL_27:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_28:.*]] = add i32 %tile_origin.1, %[[VAL_15]]
// CHECK: %[[VAL_29:.*]] = add i32 %tile_origin.2, %[[VAL_23]]
Expand Down Expand Up @@ -89,7 +87,6 @@ ENTRY main {
// CHECK: loop1.loop_body7: ; preds = %[[VAL_40]]
// CHECK: %[[VAL_46:.*]] = add nuw nsw i32 %[[VAL_42]], 4
// CHECK: store i32 %[[VAL_46]], ptr{{.*}} %[[VAL_1]], align 4
// CHECK: %[[VAL_47:.*]] = icmp eq i32 %[[VAL_42]], %thread.id.1
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_0]], align 4
// CHECK: br label %[[VAL_48:.*]]
// CHECK: loop2.loop_header12: ; preds = %[[VAL_49:.*]], %[[VAL_45]]
Expand All @@ -99,7 +96,6 @@ ENTRY main {
// CHECK: loop2.loop_body13: ; preds = %[[VAL_48]]
// CHECK: %[[VAL_52:.*]] = add nuw nsw i32 %[[VAL_50]], 32
// CHECK: store i32 %[[VAL_52]], ptr{{.*}} %[[VAL_0]], align 4
// CHECK: %[[VAL_53:.*]] = icmp eq i32 %[[VAL_50]], %thread.id.2
// CHECK: %[[VAL_54:.*]] = add i32 %tile_origin.0, 0
// CHECK: %[[VAL_55:.*]] = add i32 %tile_origin.2, %[[VAL_42]]
// CHECK: %[[VAL_56:.*]] = add i32 %tile_origin.1, %[[VAL_50]]
Expand Down
4 changes: 0 additions & 4 deletions third_party/xla/xla/service/gpu/tests/transpose_210.hlo
Expand Up @@ -46,7 +46,6 @@ ENTRY main {
// CHECK: loop0.loop_body: ; preds = %[[VAL_12]]
// CHECK: %[[VAL_19:.*]] = add nuw nsw i32 %[[VAL_15]], 4
// CHECK: store i32 %[[VAL_19]], ptr{{.*}} %[[VAL_3]], align 4
// CHECK: %[[VAL_20:.*]] = icmp eq i32 %[[VAL_15]], %thread.id.0
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_2]], align 4
// CHECK: br label %[[VAL_21:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_22:.*]], %[[VAL_18]]
Expand All @@ -56,7 +55,6 @@ ENTRY main {
// CHECK: loop2.loop_body: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_25:.*]] = add nuw nsw i32 %[[VAL_23]], 32
// CHECK: store i32 %[[VAL_25]], ptr{{.*}} %[[VAL_2]], align 4
// CHECK: %[[VAL_26:.*]] = icmp eq i32 %[[VAL_23]], %thread.id.2
// CHECK: %[[VAL_27:.*]] = add i32 %tile_origin.0, %[[VAL_15]]
// CHECK: %[[VAL_28:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_29:.*]] = add i32 %tile_origin.2, %[[VAL_23]]
Expand All @@ -80,7 +78,6 @@ ENTRY main {
// CHECK: loop0.loop_body5: ; preds = %[[VAL_35]]
// CHECK: %[[VAL_41:.*]] = add nuw nsw i32 %[[VAL_37]], 4
// CHECK: store i32 %[[VAL_41]], ptr{{.*}} %[[VAL_1]], align 4
// CHECK: %[[VAL_42:.*]] = icmp eq i32 %[[VAL_37]], %thread.id.0
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_0]], align 4
// CHECK: br label %[[VAL_43:.*]]
// CHECK: loop2.loop_header10: ; preds = %[[VAL_44:.*]], %[[VAL_40]]
Expand All @@ -90,7 +87,6 @@ ENTRY main {
// CHECK: loop2.loop_body11: ; preds = %[[VAL_43]]
// CHECK: %[[VAL_47:.*]] = add nuw nsw i32 %[[VAL_45]], 32
// CHECK: store i32 %[[VAL_47]], ptr{{.*}} %[[VAL_0]], align 4
// CHECK: %[[VAL_48:.*]] = icmp eq i32 %[[VAL_45]], %thread.id.2
// CHECK: %[[VAL_49:.*]] = add i32 %tile_origin.2, %[[VAL_37]]
// CHECK: %[[VAL_50:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_51:.*]] = add i32 %tile_origin.0, %[[VAL_45]]
Expand Down
Expand Up @@ -48,7 +48,6 @@ ENTRY main {
// CHECK: loop0.loop_body: ; preds = %[[VAL_12]]
// CHECK: %[[VAL_19:.*]] = add nuw nsw i32 %[[VAL_15]], 4
// CHECK: store i32 %[[VAL_19]], ptr{{.*}} %[[VAL_3]], align 4
// CHECK: %[[VAL_20:.*]] = icmp eq i32 %[[VAL_15]], %thread.id.0
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_2]], align 4
// CHECK: br label %[[VAL_21:.*]]
// CHECK: loop2.loop_header: ; preds = %[[VAL_22:.*]], %[[VAL_18]]
Expand All @@ -58,7 +57,6 @@ ENTRY main {
// CHECK: loop2.loop_body: ; preds = %[[VAL_21]]
// CHECK: %[[VAL_25:.*]] = add nuw nsw i32 %[[VAL_23]], 32
// CHECK: store i32 %[[VAL_25]], ptr{{.*}} %[[VAL_2]], align 4
// CHECK: %[[VAL_26:.*]] = icmp eq i32 %[[VAL_23]], %thread.id.2
// CHECK: %[[VAL_27:.*]] = add i32 %tile_origin.0, %[[VAL_15]]
// CHECK: %[[VAL_28:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_29:.*]] = add i32 %tile_origin.2, %[[VAL_23]]
Expand Down Expand Up @@ -87,7 +85,6 @@ ENTRY main {
// CHECK: loop0.loop_body7: ; preds = %[[VAL_40]]
// CHECK: %[[VAL_46:.*]] = add nuw nsw i32 %[[VAL_42]], 4
// CHECK: store i32 %[[VAL_46]], ptr{{.*}} %[[VAL_1]], align 4
// CHECK: %[[VAL_47:.*]] = icmp eq i32 %[[VAL_42]], %thread.id.0
// CHECK: store i32 %thread.id.2, ptr{{.*}} %[[VAL_0]], align 4
// CHECK: br label %[[VAL_48:.*]]
// CHECK: loop2.loop_header12: ; preds = %[[VAL_49:.*]], %[[VAL_45]]
Expand All @@ -97,7 +94,6 @@ ENTRY main {
// CHECK: loop2.loop_body13: ; preds = %[[VAL_48]]
// CHECK: %[[VAL_52:.*]] = add nuw nsw i32 %[[VAL_50]], 32
// CHECK: store i32 %[[VAL_52]], ptr{{.*}} %[[VAL_0]], align 4
// CHECK: %[[VAL_53:.*]] = icmp eq i32 %[[VAL_50]], %thread.id.2
// CHECK: %[[VAL_54:.*]] = add i32 %tile_origin.2, %[[VAL_42]]
// CHECK: %[[VAL_55:.*]] = add i32 %tile_origin.1, 0
// CHECK: %[[VAL_56:.*]] = add i32 %tile_origin.0, %[[VAL_50]]
Expand Down

0 comments on commit 3ad6844

Please sign in to comment.