40 changes: 20 additions & 20 deletions mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,43 @@

// CMP32-LABEL: llvm.func @genbool_var_1d(
// CMP32-SAME: %[[A:.*]]: i64)
// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : !llvm.vec<11 x i32>
// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : vector<11xi32>
// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : i64 to i32
// CMP32: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<11 x i32>
// CMP32: %[[T2:.*]] = llvm.mlir.undef : vector<11xi32>
// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32
// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : !llvm.vec<11 x i32>
// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i32>, !llvm.vec<11 x i32>
// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : !llvm.vec<11 x i32>
// CMP32: llvm.return %[[T6]] : !llvm.vec<11 x i1>
// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : vector<11xi32>
// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi32>, vector<11xi32>
// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : vector<11xi32>
// CMP32: llvm.return %[[T6]] : vector<11xi1>

// CMP64-LABEL: llvm.func @genbool_var_1d(
// CMP64-SAME: %[[A:.*]]: i64)
// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : !llvm.vec<11 x i64>
// CMP64: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<11 x i64>
// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : vector<11xi64>
// CMP64: %[[T1:.*]] = llvm.mlir.undef : vector<11xi64>
// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32
// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : !llvm.vec<11 x i64>
// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i64>, !llvm.vec<11 x i64>
// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : !llvm.vec<11 x i64>
// CMP64: llvm.return %[[T5]] : !llvm.vec<11 x i1>
// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<11xi64>
// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi64>, vector<11xi64>
// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : vector<11xi64>
// CMP64: llvm.return %[[T5]] : vector<11xi1>

func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
%0 = vector.create_mask %arg0 : vector<11xi1>
return %0 : vector<11xi1>
}

// CMP32-LABEL: llvm.func @transfer_read_1d
// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : !llvm.vec<16 x i32>
// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i32>
// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i32>
// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : vector<16xi32>
// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi32>
// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi32>
// CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
// CMP32: llvm.return %[[L]] : !llvm.vec<16 x f32>
// CMP32: llvm.return %[[L]] : vector<16xf32>

// CMP64-LABEL: llvm.func @transfer_read_1d
// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : !llvm.vec<16 x i64>
// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i64>
// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i64>
// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : vector<16xi64>
// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi64>
// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi64>
// CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
// CMP64: llvm.return %[[L]] : !llvm.vec<16 x f32>
// CMP64: llvm.return %[[L]] : vector<16xf32>

func @transfer_read_1d(%A : memref<?xf32>, %i: index) -> vector<16xf32> {
%d = constant -1.0: f32
Expand Down
16 changes: 8 additions & 8 deletions mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@

//
// CHECK-LABEL: llvm.func @reduce_add_f32(
// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>)
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f32, !llvm.vec<16 x f32>) -> f32
// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
// CHECK: llvm.return %[[V]] : f32
//
// REASSOC-LABEL: llvm.func @reduce_add_f32(
// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x f32>)
// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
// REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
// REASSOC-SAME: {reassoc = true} : (f32, !llvm.vec<16 x f32>) -> f32
// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
// REASSOC: llvm.return %[[V]] : f32
//
func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
Expand All @@ -23,17 +23,17 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {

//
// CHECK-LABEL: llvm.func @reduce_mul_f32(
// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x f32>)
// CHECK-SAME: %[[A:.*]]: vector<16xf32>)
// CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
// CHECK-SAME: {reassoc = false} : (f32, !llvm.vec<16 x f32>) -> f32
// CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
// CHECK: llvm.return %[[V]] : f32
//
// REASSOC-LABEL: llvm.func @reduce_mul_f32(
// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x f32>)
// REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
// REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
// REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
// REASSOC-SAME: {reassoc = true} : (f32, !llvm.vec<16 x f32>) -> f32
// REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
// REASSOC: llvm.return %[[V]] : f32
//
func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 {
Expand Down
726 changes: 363 additions & 363 deletions mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
return %f: vector<2xf32>
}
// CHECK-LABEL: @transfer_readx2
// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<2 x f32>
// CHECK: rocdl.buffer.load {{.*}} vector<2xf32>

func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
%f0 = constant 0.0: f32
Expand All @@ -19,7 +19,7 @@ func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
return %f: vector<4xf32>
}
// CHECK-LABEL: @transfer_readx4
// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<4 x f32>
// CHECK: rocdl.buffer.load {{.*}} vector<4xf32>

func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
%f0 = constant 0.0: f32
Expand All @@ -43,7 +43,7 @@ func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
return
}
// CHECK-LABEL: @transfer_writex2
// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<2 x f32>
// CHECK: rocdl.buffer.store {{.*}} vector<2xf32>

func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
vector.transfer_write %B, %A[%base]
Expand All @@ -52,7 +52,7 @@ func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
return
}
// CHECK-LABEL: @transfer_writex4
// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<4 x f32>
// CHECK: rocdl.buffer.store {{.*}} vector<4xf32>

func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
vector.transfer_write %B, %A[%base]
Expand Down
17 changes: 1 addition & 16 deletions mlir/test/Dialect/LLVMIR/dialect-cast.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ func @mlir_dialect_cast(%0: index, %1: i32, %2: bf16, %3: f16, %4: f32, %5: f64,
%10: memref<*xf32>) {
llvm.mlir.cast %0 : index to i64
llvm.mlir.cast %0 : index to i32
llvm.mlir.cast %6 : vector<42xf32> to !llvm.vec<42xf32>
llvm.mlir.cast %7 : memref<42xf32> to !llvm.ptr<f32>
llvm.mlir.cast %7 : memref<42xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
llvm.mlir.cast %8 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
Expand Down Expand Up @@ -72,27 +71,13 @@ func @mlir_dialect_cast_integer_non_integer(%0 : i16) {

// -----

func @mlir_dialect_cast_nd_vector(%0 : vector<2x2xf32>) {
// expected-error@+1 {{only 1-d vector is allowed}}
llvm.mlir.cast %0 : vector<2x2xf32> to !llvm.vec<4xf32>
}

// -----

func @mlir_dialect_cast_scalable_vector(%0 : vector<2xf32>) {
// expected-error@+1 {{only fixed-sized vector is allowed}}
// expected-error@+1 {{vector types should not be casted}}
llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec<?x2xf32>
}

// -----

func @mlir_dialect_cast_vector_size_mismatch(%0 : vector<2xf32>) {
// expected-error@+1 {{invalid cast between vectors with mismatching sizes}}
llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec<4xf32>
}

// -----

func @mlir_dialect_cast_dynamic_memref_bare_ptr(%0 : memref<?xf32>) {
// expected-error@+1 {{unexpected bare pointer for dynamically shaped memref}}
llvm.mlir.cast %0 : memref<?xf32> to !llvm.ptr<f32>
Expand Down
62 changes: 31 additions & 31 deletions mlir/test/Dialect/LLVMIR/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -317,21 +317,21 @@ func @extractvalue_wrong_nesting() {

// -----

func @invalid_vector_type_1(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) {
// expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}}
func @invalid_vector_type_1(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) {
// expected-error@+1 {{expected LLVM dialect-compatible vector type for operand #1}}
%0 = llvm.extractelement %arg2[%arg1 : i32] : f32
}

// -----

func @invalid_vector_type_2(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) {
// expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}}
func @invalid_vector_type_2(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) {
// expected-error@+1 {{expected LLVM dialect-compatible vector type for operand #1}}
%0 = llvm.insertelement %arg2, %arg2[%arg1 : i32] : f32
}

// -----

func @invalid_vector_type_3(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) {
func @invalid_vector_type_3(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) {
// expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}}
%0 = llvm.shufflevector %arg2, %arg2 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : f32, f32
}
Expand Down Expand Up @@ -366,74 +366,74 @@ func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i3

// -----

func @nvvm_invalid_mma_0(%a0 : f16, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_0(%a0 : f16, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{expected operands to be 4 <halfx2>s followed by either 4 <halfx2>s or 8 floats}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (f16, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (f16, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

// -----

func @nvvm_invalid_mma_1(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_1(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{expected result type to be a struct of either 4 <halfx2>s or 8 floats}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f16)>
}

// -----

func @nvvm_invalid_mma_2(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_2(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{alayout and blayout attributes must be set to either "row" or "col"}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

// -----

func @nvvm_invalid_mma_3(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
%c0 : !llvm.vec<2 x f16>, %c1 : !llvm.vec<2 x f16>,
%c2 : !llvm.vec<2 x f16>, %c3 : !llvm.vec<2 x f16>) {
func @nvvm_invalid_mma_3(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : vector<2xf16>, %c1 : vector<2xf16>,
%c2 : vector<2xf16>, %c3 : vector<2xf16>) {
// expected-error@+1 {{unimplemented mma.sync variant}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

// -----

func @nvvm_invalid_mma_4(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_4(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{unimplemented mma.sync variant}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(vec<2 x f16>, vec<2 x f16>, vec<2 x f16>, vec<2 x f16>)>
llvm.return %0 : !llvm.struct<(vec<2 x f16>, vec<2 x f16>, vec<2 x f16>, vec<2 x f16>)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
llvm.return %0 : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
}

// -----

func @nvvm_invalid_mma_5(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_5(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{unimplemented mma.sync variant}}
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

// -----

func @nvvm_invalid_mma_6(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_6(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{invalid kind of type specified}}
Expand All @@ -443,12 +443,12 @@ func @nvvm_invalid_mma_6(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,

// -----

func @nvvm_invalid_mma_7(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_invalid_mma_7(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// expected-error@+1 {{op requires one result}}
%0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> (!llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>, i32)
%0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> (!llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>, i32)
llvm.return %0#0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Dialect/LLVMIR/nvvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ func @nvvm_vote(%arg0 : i32, %arg1 : i1) -> i32 {
llvm.return %0 : i32
}

func @nvvm_mma(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}
164 changes: 82 additions & 82 deletions mlir/test/Dialect/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -36,133 +36,133 @@ func @rocdl.barrier() {
}

func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
%arg2 : !llvm.vec<32 x f32>, %arg3 : i32,
%arg4 : !llvm.vec<16 x f32>, %arg5 : !llvm.vec<4 x f32>,
%arg6 : !llvm.vec<4 x f16>, %arg7 : !llvm.vec<32 x i32>,
%arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>,
%arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x f32> {
%arg2 : vector<32xf32>, %arg3 : i32,
%arg4 : vector<16xf32>, %arg5 : vector<4xf32>,
%arg6 : vector<4xf16>, %arg7 : vector<32xi32>,
%arg8 : vector<16xi32>, %arg9 : vector<4xi32>,
%arg10 : vector<2xi16>) -> vector<32xf32> {
// CHECK-LABEL: rocdl.xdlops
// CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (f32, f32, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32>
// CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (f32, f32, vector<32xf32>, i32, i32, i32) -> vector<32xf32>
%r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(f32, f32, vector<32xf32>,
i32, i32, i32) -> vector<32xf32>

// CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (f32, f32, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(f32, f32, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (f32, f32, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (f32, f32, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (f32, f32, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (f32, f32, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (f32, f32, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(f32, f32, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32>
// CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32>
%r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(vector<4xf16>, vector<4xf16>, vector<32xf32>,
i32, i32, i32) -> vector<32xf32>

// CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<4xf16>, vector<4xf16>, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<4xf16>, vector<4xf16>, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (i32, i32, !llvm.vec<32 x i32>, i32, i32, i32) -> !llvm.vec<32 x i32>
// CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (i32, i32, vector<32xi32>, i32, i32, i32) -> vector<32xi32>
%r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<32 x i32>,
i32, i32, i32) -> !llvm.vec<32 x i32>
(i32, i32, vector<32xi32>,
i32, i32, i32) -> vector<32xi32>

// CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (i32, i32, !llvm.vec<16 x i32>, i32, i32, i32) -> !llvm.vec<16 x i32>
// CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32>
%r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<16 x i32>,
i32, i32, i32) -> !llvm.vec<16 x i32>
(i32, i32, vector<16xi32>,
i32, i32, i32) -> vector<16xi32>

// CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (i32, i32, !llvm.vec<4 x i32>, i32, i32, i32) -> !llvm.vec<4 x i32>
// CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32>
%r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<4 x i32>,
i32, i32, i32) -> !llvm.vec<4 x i32>
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>

// CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (i32, i32, !llvm.vec<16 x i32>, i32, i32, i32) -> !llvm.vec<16 x i32>
// CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32>
%r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<16 x i32>,
i32, i32, i32) -> !llvm.vec<16 x i32>
(i32, i32, vector<16xi32>,
i32, i32, i32) -> vector<16xi32>

// CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (i32, i32, !llvm.vec<4 x i32>, i32, i32, i32) -> !llvm.vec<4 x i32>
// CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32>
%r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<4 x i32>,
i32, i32, i32) -> !llvm.vec<4 x i32>
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>

// CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>, i32, i32, i32) -> !llvm.vec<32 x f32>
// CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32>
%r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(vector<2xi16>, vector<2xi16>, vector<32xf32>,
i32, i32, i32) -> vector<32xf32>

// CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<2xi16>, vector<2xi16>, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>, i32, i32, i32) -> !llvm.vec<16 x f32>
// CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32>
%r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<2xi16>, vector<2xi16>, vector<16xf32>,
i32, i32, i32) -> vector<16xf32>

// CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>, i32, i32, i32) -> !llvm.vec<4 x f32>
// CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32>
%r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

llvm.return %r0 : !llvm.vec<32 x f32>
llvm.return %r0 : vector<32xf32>
}

llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : i32,
llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32,
%offset : i32, %glc : i1,
%slc : i1, %vdata1 : !llvm.vec<1 x f32>,
%vdata2 : !llvm.vec<2 x f32>, %vdata4 : !llvm.vec<4 x f32>) {
%slc : i1, %vdata1 : vector<1xf32>,
%vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) {
// CHECK-LABEL: rocdl.mubuf
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x f32>
%r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32>
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x f32>
%r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32>
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x f32>
%r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32>

// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x f32>
rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32>
// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x f32>
rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32>
// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x f32>
rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32>
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<1xf32>
%r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<2xf32>
%r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
// CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<4xf32>
%r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>

// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<1xf32>
rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<2xf32>
rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
// CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<4xf32>
rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>

llvm.return
}
Expand Down
36 changes: 18 additions & 18 deletions mlir/test/Dialect/LLVMIR/roundtrip.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -223,21 +223,21 @@ llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> {
}

// CHECK-LABEL: @casts
// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: !llvm.vec<4 x i32>, %[[V4I64:.*]]: !llvm.vec<4 x i64>, %[[I32PTR:.*]]: !llvm.ptr<i32>)
func @casts(%arg0: i32, %arg1: i64, %arg2: !llvm.vec<4 x i32>,
%arg3: !llvm.vec<4 x i64>, %arg4: !llvm.ptr<i32>) {
// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[I32PTR:.*]]: !llvm.ptr<i32>)
func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>,
%arg3: vector<4xi64>, %arg4: !llvm.ptr<i32>) {
// CHECK: = llvm.sext %[[I32]] : i32 to i56
%0 = llvm.sext %arg0 : i32 to i56
// CHECK: = llvm.zext %[[I32]] : i32 to i64
%1 = llvm.zext %arg0 : i32 to i64
// CHECK: = llvm.trunc %[[I64]] : i64 to i56
%2 = llvm.trunc %arg1 : i64 to i56
// CHECK: = llvm.sext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i56>
%3 = llvm.sext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i56>
// CHECK: = llvm.zext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i64>
%4 = llvm.zext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i64>
// CHECK: = llvm.trunc %[[V4I64]] : !llvm.vec<4 x i64> to !llvm.vec<4 x i56>
%5 = llvm.trunc %arg3 : !llvm.vec<4 x i64> to !llvm.vec<4 x i56>
// CHECK: = llvm.sext %[[V4I32]] : vector<4xi32> to vector<4xi56>
%3 = llvm.sext %arg2 : vector<4xi32> to vector<4xi56>
// CHECK: = llvm.zext %[[V4I32]] : vector<4xi32> to vector<4xi64>
%4 = llvm.zext %arg2 : vector<4xi32> to vector<4xi64>
// CHECK: = llvm.trunc %[[V4I64]] : vector<4xi64> to vector<4xi56>
%5 = llvm.trunc %arg3 : vector<4xi64> to vector<4xi56>
// CHECK: = llvm.sitofp %[[I32]] : i32 to f32
%6 = llvm.sitofp %arg0 : i32 to f32
// CHECK: %[[FLOAT:.*]] = llvm.uitofp %[[I32]] : i32 to f32
Expand All @@ -252,15 +252,15 @@ func @casts(%arg0: i32, %arg1: i64, %arg2: !llvm.vec<4 x i32>,
}

// CHECK-LABEL: @vect
func @vect(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) {
// CHECK: = llvm.extractelement {{.*}} : !llvm.vec<4 x f32>
%0 = llvm.extractelement %arg0[%arg1 : i32] : !llvm.vec<4 x f32>
// CHECK: = llvm.insertelement {{.*}} : !llvm.vec<4 x f32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : !llvm.vec<4 x f32>
// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32>
%2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32>
// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32>
%3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm.vec<4 x f32>
func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) {
// CHECK: = llvm.extractelement {{.*}} : vector<4xf32>
%0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32>
// CHECK: = llvm.insertelement {{.*}} : vector<4xf32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32>
// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32>
%2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32>
// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32>
%3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32>
return
}

Expand Down
12 changes: 6 additions & 6 deletions mlir/test/Dialect/LLVMIR/types-invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -113,42 +113,42 @@ func @identified_struct_with_void() {

func @dynamic_vector() {
// expected-error @+1 {{expected '? x <integer> x <type>' or '<integer> x <type>'}}
"some.op"() : () -> !llvm.vec<? x f32>
"some.op"() : () -> !llvm.vec<? x ptr<f32>>
}

// -----

func @dynamic_scalable_vector() {
// expected-error @+1 {{expected '? x <integer> x <type>' or '<integer> x <type>'}}
"some.op"() : () -> !llvm.vec<? x ? x f32>
"some.op"() : () -> !llvm.vec<?x? x ptr<f32>>
}

// -----

func @unscalable_vector() {
// expected-error @+1 {{expected '? x <integer> x <type>' or '<integer> x <type>'}}
"some.op"() : () -> !llvm.vec<4 x 4 x i32>
"some.op"() : () -> !llvm.vec<4x4 x ptr<i32>>
}

// -----

func @zero_vector() {
// expected-error @+1 {{the number of vector elements must be positive}}
"some.op"() : () -> !llvm.vec<0 x i32>
"some.op"() : () -> !llvm.vec<0 x ptr<i32>>
}

// -----

func @nested_vector() {
// expected-error @+1 {{invalid vector element type}}
"some.op"() : () -> !llvm.vec<2 x vec<2 x i32>>
"some.op"() : () -> !llvm.vec<2 x vector<2xi32>>
}

// -----

func @scalable_void_vector() {
// expected-error @+1 {{invalid vector element type}}
"some.op"() : () -> !llvm.vec<? x 4 x void>
"some.op"() : () -> !llvm.vec<?x4 x void>
}

// -----
Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Dialect/LLVMIR/types.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ func @ptr() {

// CHECK-LABEL: @vec
func @vec() {
// CHECK: !llvm.vec<4 x i32>
"some.op"() : () -> !llvm.vec<4 x i32>
// CHECK: !llvm.vec<4 x f32>
"some.op"() : () -> !llvm.vec<4 x f32>
// CHECK: vector<4xi32>
"some.op"() : () -> vector<4xi32>
// CHECK: vector<4xf32>
"some.op"() : () -> vector<4xf32>
// CHECK: !llvm.vec<? x 4 x i32>
"some.op"() : () -> !llvm.vec<? x 4 x i32>
// CHECK: !llvm.vec<? x 8 x f16>
Expand Down
22 changes: 11 additions & 11 deletions mlir/test/Target/arm-neon.mlir
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate -arm-neon-mlir-to-llvmir | FileCheck %s

// CHECK-LABEL: arm_neon_smull
llvm.func @arm_neon_smull(%arg0: !llvm.vec<8 x i8>, %arg1: !llvm.vec<8 x i8>) -> !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)> {
llvm.func @arm_neon_smull(%arg0: vector<8xi8>, %arg1: vector<8xi8>) -> !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> {
// CHECK: %[[V0:.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %{{.*}}, <8 x i8> %{{.*}})
// CHECK-NEXT: %[[V00:.*]] = shufflevector <8 x i16> %3, <8 x i16> %[[V0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
%0 = "llvm_arm_neon.smull"(%arg0, %arg1) : (!llvm.vec<8 x i8>, !llvm.vec<8 x i8>) -> !llvm.vec<8 x i16>
%1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : !llvm.vec<8 x i16>, !llvm.vec<8 x i16>
%0 = "llvm_arm_neon.smull"(%arg0, %arg1) : (vector<8xi8>, vector<8xi8>) -> vector<8xi16>
%1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : vector<8xi16>, vector<8xi16>

// CHECK-NEXT: %[[V1:.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %[[V00]], <4 x i16> %[[V00]])
// CHECK-NEXT: %[[V11:.*]] = shufflevector <4 x i32> %[[V1]], <4 x i32> %[[V1]], <2 x i32> <i32 1, i32 2>
%2 = "llvm_arm_neon.smull"(%1, %1) : (!llvm.vec<4 x i16>, !llvm.vec<4 x i16>) -> !llvm.vec<4 x i32>
%3 = llvm.shufflevector %2, %2 [1, 2] : !llvm.vec<4 x i32>, !llvm.vec<4 x i32>
%2 = "llvm_arm_neon.smull"(%1, %1) : (vector<4xi16>, vector<4xi16>) -> vector<4xi32>
%3 = llvm.shufflevector %2, %2 [1, 2] : vector<4xi32>, vector<4xi32>

// CHECK-NEXT: %[[V1:.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %[[V11]], <2 x i32> %[[V11]])
%4 = "llvm_arm_neon.smull"(%3, %3) : (!llvm.vec<2 x i32>, !llvm.vec<2 x i32>) -> !llvm.vec<2 x i64>
%4 = "llvm_arm_neon.smull"(%3, %3) : (vector<2xi32>, vector<2xi32>) -> vector<2xi64>

%5 = llvm.mlir.undef : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
%6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
%7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
%8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
%5 = llvm.mlir.undef : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)>
%6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)>
%7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)>
%8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)>

// CHECK: ret { <8 x i16>, <4 x i32>, <2 x i64> }
llvm.return %8 : !llvm.struct<(vec<8 x i16>, vec<4 x i32>, vec<2 x i64>)>
llvm.return %8 : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)>
}
56 changes: 28 additions & 28 deletions mlir/test/Target/arm-sve.mlir
Original file line number Diff line number Diff line change
@@ -1,51 +1,51 @@
// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --arm-sve-mlir-to-llvmir | FileCheck %s

// CHECK-LABEL: define <vscale x 4 x i32> @arm_sve_sdot
llvm.func @arm_sve_sdot(%arg0: !llvm.vec<? x 16 x i8>,
%arg1: !llvm.vec<? x 16 x i8>,
%arg2: !llvm.vec<? x 4 x i32>)
-> !llvm.vec<? x 4 x i32> {
llvm.func @arm_sve_sdot(%arg0: !llvm.vec<?x16 x i8>,
%arg1: !llvm.vec<?x16 x i8>,
%arg2: !llvm.vec<?x4 x i32>)
-> !llvm.vec<?x4 x i32> {
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4
%0 = "llvm_arm_sve.sdot"(%arg2, %arg0, %arg1) :
(!llvm.vec<? x 4 x i32>, !llvm.vec<? x 16 x i8>, !llvm.vec<? x 16 x i8>)
-> !llvm.vec<? x 4 x i32>
llvm.return %0 : !llvm.vec<? x 4 x i32>
(!llvm.vec<?x4 x i32>, !llvm.vec<?x16 x i8>, !llvm.vec<?x16 x i8>)
-> !llvm.vec<?x4 x i32>
llvm.return %0 : !llvm.vec<?x4 x i32>
}

// CHECK-LABEL: define <vscale x 4 x i32> @arm_sve_smmla
llvm.func @arm_sve_smmla(%arg0: !llvm.vec<? x 16 x i8>,
%arg1: !llvm.vec<? x 16 x i8>,
%arg2: !llvm.vec<? x 4 x i32>)
-> !llvm.vec<? x 4 x i32> {
llvm.func @arm_sve_smmla(%arg0: !llvm.vec<?x16 x i8>,
%arg1: !llvm.vec<?x16 x i8>,
%arg2: !llvm.vec<?x4 x i32>)
-> !llvm.vec<?x4 x i32> {
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4
%0 = "llvm_arm_sve.smmla"(%arg2, %arg0, %arg1) :
(!llvm.vec<? x 4 x i32>, !llvm.vec<? x 16 x i8>, !llvm.vec<? x 16 x i8>)
-> !llvm.vec<? x 4 x i32>
llvm.return %0 : !llvm.vec<? x 4 x i32>
(!llvm.vec<?x4 x i32>, !llvm.vec<?x16 x i8>, !llvm.vec<?x16 x i8>)
-> !llvm.vec<?x4 x i32>
llvm.return %0 : !llvm.vec<?x4 x i32>
}

// CHECK-LABEL: define <vscale x 4 x i32> @arm_sve_udot
llvm.func @arm_sve_udot(%arg0: !llvm.vec<? x 16 x i8>,
%arg1: !llvm.vec<? x 16 x i8>,
%arg2: !llvm.vec<? x 4 x i32>)
-> !llvm.vec<? x 4 x i32> {
llvm.func @arm_sve_udot(%arg0: !llvm.vec<?x16 x i8>,
%arg1: !llvm.vec<?x16 x i8>,
%arg2: !llvm.vec<?x4 x i32>)
-> !llvm.vec<?x4 x i32> {
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4
%0 = "llvm_arm_sve.udot"(%arg2, %arg0, %arg1) :
(!llvm.vec<? x 4 x i32>, !llvm.vec<? x 16 x i8>, !llvm.vec<? x 16 x i8>)
-> !llvm.vec<? x 4 x i32>
llvm.return %0 : !llvm.vec<? x 4 x i32>
(!llvm.vec<?x4 x i32>, !llvm.vec<?x16 x i8>, !llvm.vec<?x16 x i8>)
-> !llvm.vec<?x4 x i32>
llvm.return %0 : !llvm.vec<?x4 x i32>
}

// CHECK-LABEL: define <vscale x 4 x i32> @arm_sve_ummla
llvm.func @arm_sve_ummla(%arg0: !llvm.vec<? x 16 x i8>,
%arg1: !llvm.vec<? x 16 x i8>,
%arg2: !llvm.vec<? x 4 x i32>)
-> !llvm.vec<? x 4 x i32> {
llvm.func @arm_sve_ummla(%arg0: !llvm.vec<?x16 x i8>,
%arg1: !llvm.vec<?x16 x i8>,
%arg2: !llvm.vec<?x4 x i32>)
-> !llvm.vec<?x4 x i32> {
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4
%0 = "llvm_arm_sve.ummla"(%arg2, %arg0, %arg1) :
(!llvm.vec<? x 4 x i32>, !llvm.vec<? x 16 x i8>, !llvm.vec<? x 16 x i8>)
-> !llvm.vec<? x 4 x i32>
llvm.return %0 : !llvm.vec<? x 4 x i32>
(!llvm.vec<?x4 x i32>, !llvm.vec<?x16 x i8>, !llvm.vec<?x16 x i8>)
-> !llvm.vec<?x4 x i32>
llvm.return %0 : !llvm.vec<?x4 x i32>
}

// CHECK-LABEL: define i64 @get_vector_scale()
Expand Down
20 changes: 10 additions & 10 deletions mlir/test/Target/avx512.mlir
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --avx512-mlir-to-llvmir | FileCheck %s

// CHECK-LABEL: define <16 x float> @LLVM_x86_avx512_mask_ps_512
llvm.func @LLVM_x86_avx512_mask_ps_512(%a: !llvm.vec<16 x f32>,
llvm.func @LLVM_x86_avx512_mask_ps_512(%a: vector<16 x f32>,
%b: i32,
%c: i16)
-> (!llvm.vec<16 x f32>)
-> (vector<16 x f32>)
{
// CHECK: call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>
%0 = "llvm_avx512.mask.rndscale.ps.512"(%a, %b, %a, %c, %b) :
(!llvm.vec<16 x f32>, i32, !llvm.vec<16 x f32>, i16, i32) -> !llvm.vec<16 x f32>
(vector<16 x f32>, i32, vector<16 x f32>, i16, i32) -> vector<16 x f32>
// CHECK: call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>
%1 = "llvm_avx512.mask.scalef.ps.512"(%a, %a, %a, %c, %b) :
(!llvm.vec<16 x f32>, !llvm.vec<16 x f32>, !llvm.vec<16 x f32>, i16, i32) -> !llvm.vec<16 x f32>
llvm.return %1: !llvm.vec<16 x f32>
(vector<16 x f32>, vector<16 x f32>, vector<16 x f32>, i16, i32) -> vector<16 x f32>
llvm.return %1: vector<16 x f32>
}

// CHECK-LABEL: define <8 x double> @LLVM_x86_avx512_mask_pd_512
llvm.func @LLVM_x86_avx512_mask_pd_512(%a: !llvm.vec<8 x f64>,
llvm.func @LLVM_x86_avx512_mask_pd_512(%a: vector<8xf64>,
%b: i32,
%c: i8)
-> (!llvm.vec<8 x f64>)
-> (vector<8xf64>)
{
// CHECK: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>
%0 = "llvm_avx512.mask.rndscale.pd.512"(%a, %b, %a, %c, %b) :
(!llvm.vec<8 x f64>, i32, !llvm.vec<8 x f64>, i8, i32) -> !llvm.vec<8 x f64>
(vector<8xf64>, i32, vector<8xf64>, i8, i32) -> vector<8xf64>
// CHECK: call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>
%1 = "llvm_avx512.mask.scalef.pd.512"(%a, %a, %a, %c, %b) :
(!llvm.vec<8 x f64>, !llvm.vec<8 x f64>, !llvm.vec<8 x f64>, i8, i32) -> !llvm.vec<8 x f64>
llvm.return %1: !llvm.vec<8 x f64>
(vector<8xf64>, vector<8xf64>, vector<8xf64>, i8, i32) -> vector<8xf64>
llvm.return %1: vector<8xf64>
}
6 changes: 3 additions & 3 deletions mlir/test/Target/import.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; CHECK: llvm.mlir.global internal @g3("string")
@g3 = internal global [6 x i8] c"string"

; CHECK: llvm.mlir.global external @g5() : !llvm.vec<8 x i32>
; CHECK: llvm.mlir.global external @g5() : vector<8xi32>
@g5 = external global <8 x i32>

@g4 = external global i32, align 8
Expand Down Expand Up @@ -53,15 +53,15 @@
; Sequential constants.
;

; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : !llvm.vec<2 x i32>
; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : vector<2xi32>
@vector_constant = internal constant <2 x i32> <i32 1, i32 2>
; CHECK: llvm.mlir.global internal constant @array_constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm.array<2 x f32>
@array_constant = internal constant [2 x float] [float 1., float 2.]
; CHECK: llvm.mlir.global internal constant @nested_array_constant(dense<[{{\[}}1, 2], [3, 4]]> : tensor<2x2xi32>) : !llvm.array<2 x array<2 x i32>>
@nested_array_constant = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]]
; CHECK: llvm.mlir.global internal constant @nested_array_constant3(dense<[{{\[}}[1, 2], [3, 4]]]> : tensor<1x2x2xi32>) : !llvm.array<1 x array<2 x array<2 x i32>>>
@nested_array_constant3 = internal constant [1 x [2 x [2 x i32]]] [[2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]]]
; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm.array<1 x array<2 x vec<2 x i32>>>
; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm.array<1 x array<2 x vector<2xi32>>>
@nested_array_vector = internal constant [1 x [2 x <2 x i32>]] [[2 x <2 x i32>] [<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>]]

;
Expand Down
172 changes: 86 additions & 86 deletions mlir/test/Target/llvmir-intrinsics.mlir

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions mlir/test/Target/llvmir-types.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ llvm.func @return_ppi8_42_9() -> !llvm.ptr<ptr<i8, 42>, 9>
//

// CHECK: declare <4 x i32> @return_v4_i32()
llvm.func @return_v4_i32() -> !llvm.vec<4 x i32>
llvm.func @return_v4_i32() -> vector<4xi32>
// CHECK: declare <4 x float> @return_v4_float()
llvm.func @return_v4_float() -> !llvm.vec<4 x f32>
llvm.func @return_v4_float() -> vector<4xf32>
// CHECK: declare <vscale x 4 x i32> @return_vs_4_i32()
llvm.func @return_vs_4_i32() -> !llvm.vec<? x 4 x i32>
llvm.func @return_vs_4_i32() -> !llvm.vec<?x4 x i32>
// CHECK: declare <vscale x 8 x half> @return_vs_8_half()
llvm.func @return_vs_8_half() -> !llvm.vec<? x 8 x f16>
llvm.func @return_vs_8_half() -> !llvm.vec<?x8 x f16>
// CHECK: declare <4 x i8*> @return_v_4_pi8()
llvm.func @return_v_4_pi8() -> !llvm.vec<4 x ptr<i8>>
llvm.func @return_v_4_pi8() -> !llvm.vec<4xptr<i8>>

//
// Arrays.
Expand Down
84 changes: 42 additions & 42 deletions mlir/test/Target/llvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -782,66 +782,66 @@ llvm.func @multireturn_caller() {
}

// CHECK-LABEL: define <4 x float> @vector_ops(<4 x float> {{%.*}}, <4 x i1> {{%.*}}, <4 x i64> {{%.*}})
llvm.func @vector_ops(%arg0: !llvm.vec<4 x f32>, %arg1: !llvm.vec<4 x i1>, %arg2: !llvm.vec<4 x i64>) -> !llvm.vec<4 x f32> {
%0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm.vec<4 x f32>
llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>) -> vector<4xf32> {
%0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32>
// CHECK-NEXT: %4 = fadd <4 x float> %0, <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01>
%1 = llvm.fadd %arg0, %0 : !llvm.vec<4 x f32>
%1 = llvm.fadd %arg0, %0 : vector<4xf32>
// CHECK-NEXT: %5 = select <4 x i1> %1, <4 x float> %4, <4 x float> %0
%2 = llvm.select %arg1, %1, %arg0 : !llvm.vec<4 x i1>, !llvm.vec<4 x f32>
%2 = llvm.select %arg1, %1, %arg0 : vector<4xi1>, vector<4xf32>
// CHECK-NEXT: %6 = sdiv <4 x i64> %2, %2
%3 = llvm.sdiv %arg2, %arg2 : !llvm.vec<4 x i64>
%3 = llvm.sdiv %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %7 = udiv <4 x i64> %2, %2
%4 = llvm.udiv %arg2, %arg2 : !llvm.vec<4 x i64>
%4 = llvm.udiv %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %8 = srem <4 x i64> %2, %2
%5 = llvm.srem %arg2, %arg2 : !llvm.vec<4 x i64>
%5 = llvm.srem %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %9 = urem <4 x i64> %2, %2
%6 = llvm.urem %arg2, %arg2 : !llvm.vec<4 x i64>
%6 = llvm.urem %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %10 = fdiv <4 x float> %0, <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01>
%7 = llvm.fdiv %arg0, %0 : !llvm.vec<4 x f32>
%7 = llvm.fdiv %arg0, %0 : vector<4xf32>
// CHECK-NEXT: %11 = frem <4 x float> %0, <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01>
%8 = llvm.frem %arg0, %0 : !llvm.vec<4 x f32>
%8 = llvm.frem %arg0, %0 : vector<4xf32>
// CHECK-NEXT: %12 = and <4 x i64> %2, %2
%9 = llvm.and %arg2, %arg2 : !llvm.vec<4 x i64>
%9 = llvm.and %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %13 = or <4 x i64> %2, %2
%10 = llvm.or %arg2, %arg2 : !llvm.vec<4 x i64>
%10 = llvm.or %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %14 = xor <4 x i64> %2, %2
%11 = llvm.xor %arg2, %arg2 : !llvm.vec<4 x i64>
%11 = llvm.xor %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %15 = shl <4 x i64> %2, %2
%12 = llvm.shl %arg2, %arg2 : !llvm.vec<4 x i64>
%12 = llvm.shl %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %16 = lshr <4 x i64> %2, %2
%13 = llvm.lshr %arg2, %arg2 : !llvm.vec<4 x i64>
%13 = llvm.lshr %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: %17 = ashr <4 x i64> %2, %2
%14 = llvm.ashr %arg2, %arg2 : !llvm.vec<4 x i64>
%14 = llvm.ashr %arg2, %arg2 : vector<4xi64>
// CHECK-NEXT: ret <4 x float> %4
llvm.return %1 : !llvm.vec<4 x f32>
llvm.return %1 : vector<4xf32>
}

// CHECK-LABEL: @vector_splat_1d
llvm.func @vector_splat_1d() -> !llvm.vec<4 x f32> {
llvm.func @vector_splat_1d() -> vector<4xf32> {
// CHECK: ret <4 x float> zeroinitializer
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32>
llvm.return %0 : !llvm.vec<4 x f32>
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : vector<4xf32>
llvm.return %0 : vector<4xf32>
}

// CHECK-LABEL: @vector_splat_2d
llvm.func @vector_splat_2d() -> !llvm.array<4 x vec<16 x f32>> {
llvm.func @vector_splat_2d() -> !llvm.array<4 x vector<16 x f32>> {
// CHECK: ret [4 x <16 x float>] zeroinitializer
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm.array<4 x vec<16 x f32>>
llvm.return %0 : !llvm.array<4 x vec<16 x f32>>
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm.array<4 x vector<16 x f32>>
llvm.return %0 : !llvm.array<4 x vector<16 x f32>>
}

// CHECK-LABEL: @vector_splat_3d
llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vec<4 x f32>>> {
llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vector<4 x f32>>> {
// CHECK: ret [4 x [16 x <4 x float>]] zeroinitializer
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm.array<4 x array<16 x vec<4 x f32>>>
llvm.return %0 : !llvm.array<4 x array<16 x vec<4 x f32>>>
%0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm.array<4 x array<16 x vector<4 x f32>>>
llvm.return %0 : !llvm.array<4 x array<16 x vector<4 x f32>>>
}

// CHECK-LABEL: @vector_splat_nonzero
llvm.func @vector_splat_nonzero() -> !llvm.vec<4 x f32> {
llvm.func @vector_splat_nonzero() -> vector<4xf32> {
// CHECK: ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x f32>
llvm.return %0 : !llvm.vec<4 x f32>
%0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32>
llvm.return %0 : vector<4xf32>
}

// CHECK-LABEL: @ops
Expand Down Expand Up @@ -1019,22 +1019,22 @@ llvm.func @fcmp(%arg0: f32, %arg1: f32) {
}

// CHECK-LABEL: @vect
llvm.func @vect(%arg0: !llvm.vec<4 x f32>, %arg1: i32, %arg2: f32) {
llvm.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32) {
// CHECK-NEXT: extractelement <4 x float> {{.*}}, i32
// CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i32
// CHECK-NEXT: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <5 x i32> <i32 0, i32 0, i32 0, i32 0, i32 7>
%0 = llvm.extractelement %arg0[%arg1 : i32] : !llvm.vec<4 x f32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : !llvm.vec<4 x f32>
%2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x f32>, !llvm.vec<4 x f32>
%0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32>
%2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : vector<4xf32>, vector<4xf32>
llvm.return
}

// CHECK-LABEL: @vect_i64idx
llvm.func @vect_i64idx(%arg0: !llvm.vec<4 x f32>, %arg1: i64, %arg2: f32) {
llvm.func @vect_i64idx(%arg0: vector<4xf32>, %arg1: i64, %arg2: f32) {
// CHECK-NEXT: extractelement <4 x float> {{.*}}, i64
// CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i64
%0 = llvm.extractelement %arg0[%arg1 : i64] : !llvm.vec<4 x f32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i64] : !llvm.vec<4 x f32>
%0 = llvm.extractelement %arg0[%arg1 : i64] : vector<4xf32>
%1 = llvm.insertelement %arg2, %arg0[%arg1 : i64] : vector<4xf32>
llvm.return
}

Expand All @@ -1050,10 +1050,10 @@ llvm.func @alloca(%size : i64) {
}

// CHECK-LABEL: @constants
llvm.func @constants() -> !llvm.vec<4 x f32> {
llvm.func @constants() -> vector<4xf32> {
// CHECK: ret <4 x float> <float 4.2{{0*}}e+01, float 0.{{0*}}e+00, float 0.{{0*}}e+00, float 0.{{0*}}e+00>
%0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : !llvm.vec<4 x f32>
llvm.return %0 : !llvm.vec<4 x f32>
%0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : vector<4xf32>
llvm.return %0 : vector<4xf32>
}

// CHECK-LABEL: @fp_casts
Expand Down Expand Up @@ -1088,12 +1088,12 @@ llvm.func @null() -> !llvm.ptr<i32> {

// Check that dense elements attributes are exported properly in constants.
// CHECK-LABEL: @elements_constant_3d_vector
llvm.func @elements_constant_3d_vector() -> !llvm.array<2 x array<2 x vec<2 x i32>>> {
llvm.func @elements_constant_3d_vector() -> !llvm.array<2 x array<2 x vector<2 x i32>>> {
// CHECK: ret [2 x [2 x <2 x i32>]]
// CHECK-SAME: {{\[}}[2 x <2 x i32>] [<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>],
// CHECK-SAME: [2 x <2 x i32>] [<2 x i32> <i32 42, i32 43>, <2 x i32> <i32 44, i32 45>]]
%0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm.array<2 x array<2 x vec<2 x i32>>>
llvm.return %0 : !llvm.array<2 x array<2 x vec<2 x i32>>>
%0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm.array<2 x array<2 x vector<2 x i32>>>
llvm.return %0 : !llvm.array<2 x array<2 x vector<2 x i32>>>
}

// CHECK-LABEL: @elements_constant_3d_array
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Target/nvvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 {
llvm.return %3 : i32
}

llvm.func @nvvm_mma(%a0 : !llvm.vec<2 x f16>, %a1 : !llvm.vec<2 x f16>,
%b0 : !llvm.vec<2 x f16>, %b1 : !llvm.vec<2 x f16>,
llvm.func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
%b0 : vector<2xf16>, %b1 : vector<2xf16>,
%c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32,
%c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) {
// CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, !llvm.vec<2 x f16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
%0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
}

Expand Down
110 changes: 55 additions & 55 deletions mlir/test/Target/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -43,133 +43,133 @@ llvm.func @rocdl.barrier() {
}

llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32,
%arg2 : !llvm.vec<32 x f32>, %arg3 : i32,
%arg4 : !llvm.vec<16 x f32>, %arg5 : !llvm.vec<4 x f32>,
%arg6 : !llvm.vec<4 x f16>, %arg7 : !llvm.vec<32 x i32>,
%arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>,
%arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x f32> {
%arg2 : vector<32 x f32>, %arg3 : i32,
%arg4 : vector<16 x f32>, %arg5 : vector<4xf32>,
%arg6 : vector<4xf16>, %arg7 : vector<32 x i32>,
%arg8 : vector<16 x i32>, %arg9 : vector<4xi32>,
%arg10 : vector<2xi16>) -> vector<32 x f32> {
// CHECK-LABEL: rocdl.xdlops
// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(f32, f32, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(f32, f32, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(f32, f32, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 :
(f32, f32, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(f32, f32, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(vector<4xf16>, vector<4xf16>, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<4xf16>, vector<4xf16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<4xf16>, vector<4xf16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<4 x f16>, !llvm.vec<4 x f16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<4xf16>, vector<4xf16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<32 x i32>,
i32, i32, i32) -> !llvm.vec<32 x i32>
(i32, i32, vector<32 x i32>,
i32, i32, i32) -> vector<32 x i32>

// CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<16 x i32>,
i32, i32, i32) -> !llvm.vec<16 x i32>
(i32, i32, vector<16 x i32>,
i32, i32, i32) -> vector<16 x i32>

// CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<4 x i32>,
i32, i32, i32) -> !llvm.vec<4 x i32>
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>

// CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<16 x i32>,
i32, i32, i32) -> !llvm.vec<16 x i32>
(i32, i32, vector<16 x i32>,
i32, i32, i32) -> vector<16 x i32>

// CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 :
(i32, i32, !llvm.vec<4 x i32>,
i32, i32, i32) -> !llvm.vec<4 x i32>
(i32, i32, vector<4xi32>,
i32, i32, i32) -> vector<4xi32>

// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x f32>,
i32, i32, i32) -> !llvm.vec<32 x f32>
(vector<2xi16>, vector<2xi16>, vector<32 x f32>,
i32, i32, i32) -> vector<32 x f32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<2xi16>, vector<2xi16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x f32>,
i32, i32, i32) -> !llvm.vec<16 x f32>
(vector<2xi16>, vector<2xi16>, vector<16 x f32>,
i32, i32, i32) -> vector<16 x f32>

// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 :
(!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x f32>,
i32, i32, i32) -> !llvm.vec<4 x f32>
(vector<2xi16>, vector<2xi16>, vector<4xf32>,
i32, i32, i32) -> vector<4xf32>

llvm.return %r0 : !llvm.vec<32 x f32>
llvm.return %r0 : vector<32 x f32>
}

llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : i32,
llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32,
%offset : i32, %glc : i1,
%slc : i1, %vdata1 : !llvm.vec<1 x f32>,
%vdata2 : !llvm.vec<2 x f32>, %vdata4 : !llvm.vec<4 x f32>) {
%slc : i1, %vdata1 : vector<1xf32>,
%vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) {
// CHECK-LABEL: rocdl.mubuf
// CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
%r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32>
%r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
// CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
%r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32>
%r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
// CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
%r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32>
%r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>

// CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x f32>
rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : vector<1xf32>
// CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x f32>
rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : vector<2xf32>
// CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}})
rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x f32>
rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : vector<4xf32>

llvm.return
}
Expand Down