Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[X86][AMX] Pre-checkin the test case for AMX undef and zero
- Loading branch information
Showing
1 changed file
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s | ||
|
||
define void @foo_undef(i8 *%buf) { | ||
; CHECK-LABEL: @foo_undef( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label [[L2:%.*]] | ||
; CHECK: l1: | ||
; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i32>* [[TMP1]] to i8* | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[TMP2]], i64 32, x86_amx [[T1]]) | ||
; CHECK-NEXT: [[TMP3:%.*]] = load <256 x i32>, <256 x i32>* [[TMP1]], align 1024 | ||
; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]] | ||
; CHECK: l2: | ||
; CHECK-NEXT: [[T3:%.*]] = phi <256 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP3]], [[L1]] ] | ||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i32>* [[TMP0]] to i8* | ||
; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024 | ||
; CHECK-NEXT: [[TMP5:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 32, i8* [[TMP4]], i64 32) | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP5]]) | ||
; CHECK-NEXT: br label [[EXIT]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br i1 undef, label %l1, label %l2 | ||
|
||
l1: | ||
%t1 = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) | ||
br i1 undef, label %l2, label %exit | ||
|
||
l2: | ||
%t3 = phi <256 x i32> [ undef, %entry ], [ %t2, %l1 ] | ||
%t4 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t3) | ||
call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* %buf, i64 1024, x86_amx %t4) | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @foo_zero(i8 *%buf) { | ||
; CHECK-LABEL: @foo_zero( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label [[L2:%.*]] | ||
; CHECK: l1: | ||
; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i32>* [[TMP1]] to i8* | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[TMP2]], i64 32, x86_amx [[T1]]) | ||
; CHECK-NEXT: [[TMP3:%.*]] = load <256 x i32>, <256 x i32>* [[TMP1]], align 1024 | ||
; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]] | ||
; CHECK: l2: | ||
; CHECK-NEXT: [[T3:%.*]] = phi <256 x i32> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP3]], [[L1]] ] | ||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i32>* [[TMP0]] to i8* | ||
; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024 | ||
; CHECK-NEXT: [[TMP5:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 32, i8* [[TMP4]], i64 32) | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP5]]) | ||
; CHECK-NEXT: br label [[EXIT]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br i1 undef, label %l1, label %l2 | ||
|
||
l1: | ||
%t1 = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) | ||
br i1 undef, label %l2, label %exit | ||
|
||
l2: | ||
%t3 = phi <256 x i32> [ zeroinitializer, %entry ], [ %t2, %l1 ] | ||
%t4 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t3) | ||
call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* %buf, i64 1024, x86_amx %t4) | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @foo_vrow(i8 *%buf, i16 %row) { | ||
; CHECK-LABEL: @foo_vrow( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label [[L2:%.*]] | ||
; CHECK: l1: | ||
; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 [[ROW:%.*]], i16 32) | ||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i32>* [[TMP1]] to i8* | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 32, i8* [[TMP2]], i64 32, x86_amx [[T1]]) | ||
; CHECK-NEXT: [[TMP3:%.*]] = load <256 x i32>, <256 x i32>* [[TMP1]], align 1024 | ||
; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]] | ||
; CHECK: l2: | ||
; CHECK-NEXT: [[T3:%.*]] = phi <256 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP3]], [[L1]] ] | ||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <256 x i32>* [[TMP0]] to i8* | ||
; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024 | ||
; CHECK-NEXT: [[TMP5:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 32, i8* [[TMP4]], i64 32) | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP5]]) | ||
; CHECK-NEXT: br label [[EXIT]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br i1 undef, label %l1, label %l2 | ||
|
||
l1: | ||
%t1 = call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 32) | ||
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) | ||
br i1 undef, label %l2, label %exit | ||
|
||
l2: | ||
%t3 = phi <256 x i32> [ undef, %entry ], [ %t2, %l1 ] | ||
%t4 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t3) | ||
call void @llvm.x86.tilestored64.internal(i16 %row, i16 32, i8* %buf, i64 1024, x86_amx %t4) | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @foo_vcol(i8 *%buf, i16 %col) { | ||
; CHECK-LABEL: @foo_vcol( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label [[L2:%.*]] | ||
; CHECK: l1: | ||
; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 [[COL:%.*]]) | ||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <256 x i32>* [[TMP1]] to i8* | ||
; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[COL]] to i64 | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[TMP2]], i64 [[TMP3]], x86_amx [[T1]]) | ||
; CHECK-NEXT: [[TMP4:%.*]] = load <256 x i32>, <256 x i32>* [[TMP1]], align 1024 | ||
; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]] | ||
; CHECK: l2: | ||
; CHECK-NEXT: [[T3:%.*]] = phi <256 x i32> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP4]], [[L1]] ] | ||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <256 x i32>* [[TMP0]] to i8* | ||
; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024 | ||
; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[COL]] to i64 | ||
; CHECK-NEXT: [[TMP7:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* [[TMP5]], i64 [[TMP6]]) | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP7]]) | ||
; CHECK-NEXT: br label [[EXIT]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br i1 undef, label %l1, label %l2 | ||
|
||
l1: | ||
%t1 = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 %col) | ||
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) | ||
br i1 undef, label %l2, label %exit | ||
|
||
l2: | ||
%t3 = phi <256 x i32> [ zeroinitializer, %entry ], [ %t2, %l1 ] | ||
%t4 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t3) | ||
call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, i8* %buf, i64 1024, x86_amx %t4) | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @foo_noshape(i8 *%buf) { | ||
; CHECK-LABEL: @foo_noshape( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64 | ||
; CHECK-NEXT: br i1 undef, label [[L1:%.*]], label [[L2:%.*]] | ||
; CHECK: l1: | ||
; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <256 x i32>* [[TMP0]] to i8* | ||
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[TMP1]], i64 32, x86_amx [[T1]]) | ||
; CHECK-NEXT: [[TMP2:%.*]] = load <256 x i32>, <256 x i32>* [[TMP0]], align 1024 | ||
; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]] | ||
; CHECK: l2: | ||
; CHECK-NEXT: [[T3:%.*]] = phi <256 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP2]], [[L1]] ] | ||
; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[BUF:%.*]] to <256 x i32>* | ||
; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[P]], align 1024 | ||
; CHECK-NEXT: br label [[EXIT]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br i1 undef, label %l1, label %l2 | ||
|
||
l1: | ||
%t1 = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32) | ||
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) | ||
br i1 undef, label %l2, label %exit | ||
|
||
l2: | ||
%t3 = phi <256 x i32> [ undef, %entry ], [ %t2, %l1 ] | ||
%t4 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t3) | ||
%t5 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t4) | ||
%p = bitcast i8* %buf to <256 x i32>* | ||
store <256 x i32> %t5, <256 x i32>* %p | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
declare x86_amx @llvm.x86.tilezero.internal(i16, i16) | ||
declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) | ||
declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) | ||
declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) | ||
declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) | ||
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) |