diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 51396947fad4e..fa510bbf499fd 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -110,6 +110,7 @@ def NVVM_Dialect : Dialect { }]; let useDefaultAttributePrinterParser = 1; + let useStrictPropertiesInAssemblyFormat = 1; } //===----------------------------------------------------------------------===// @@ -308,7 +309,7 @@ class NVVM_F32UnaryApproxOp traits = []> : let arguments = (ins F32:$src, DefaultValuedAttr:$ftz); let results = (outs F32:$res); - let assemblyFormat = "$src attr-dict `:` type($src)"; + let assemblyFormat = "$src (`,` `ftz` `=` $ftz^)? attr-dict `:` type($src)"; } @@ -610,7 +611,8 @@ def NVVM_ReduxOp : $res = createIntrinsicCall(builder, intId, {$val, $mask_and_clamp}); }]; let assemblyFormat = [{ - $kind $val `,` $mask_and_clamp attr-dict `:` type($val) `->` type($res) + $kind $val `,` $mask_and_clamp (`abs` `=` $abs^)? + (`nan` `=` $nan^)? attr-dict `:` type($val) `->` type($res) }]; } @@ -755,7 +757,7 @@ def NVVM_MBarrierExpectTxOp : NVVM_VoidIntrinsicOp<"mbarrier.expect_tx"> { I32:$txcount, DefaultValuedAttr:$scope); - let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands)"; + let assemblyFormat = "$addr `,` $txcount prop-dict attr-dict `:` type(operands)"; let hasVerifier = 1; } @@ -778,7 +780,7 @@ def NVVM_MBarrierCompleteTxOp : NVVM_VoidIntrinsicOp<"mbarrier.complete_tx"> { I32:$txcount, DefaultValuedAttr:$scope); - let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands)"; + let assemblyFormat = "$addr `,` $txcount prop-dict attr-dict `:` type(operands)"; let hasVerifier = 1; } @@ -827,7 +829,7 @@ def NVVM_MBarrierArriveOp : NVVM_SingleResultIntrinsicOp<"mbarrier.arrive", DefaultValuedAttr:$scope, DefaultValuedAttr:$relaxed); - let assemblyFormat = "$addr (`,` $count^)? attr-dict `:` type($addr) (`->` type($res)^)?"; + let assemblyFormat = "$addr (`,` $count^)? prop-dict attr-dict `:` type($addr) (`->` type($res)^)?"; let hasVerifier = 1; } @@ -853,7 +855,7 @@ def NVVM_MBarrierArriveDropOp : NVVM_SingleResultIntrinsicOp<"mbarrier.arrive_dr DefaultValuedAttr:$scope, DefaultValuedAttr:$relaxed); - let assemblyFormat = "$addr (`,` $count^)? attr-dict `:` type($addr) (`->` type($res)^)?"; + let assemblyFormat = "$addr (`,` $count^)? prop-dict attr-dict `:` type($addr) (`->` type($res)^)?"; let hasVerifier = 1; } @@ -951,7 +953,7 @@ def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_t DefaultValuedAttr:$relaxed, PtxPredicate:$predicate); - let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands) (`->` type($res)^)?"; + let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? prop-dict attr-dict `:` type(operands) (`->` type($res)^)?"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -994,7 +996,7 @@ def NVVM_MBarrierArriveDropExpectTxOp : NVVM_SingleResultIntrinsicOp<"mbarrier.a DefaultValuedAttr:$scope, DefaultValuedAttr:$relaxed); - let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands) (`->` type($res)^)?"; + let assemblyFormat = "$addr `,` $txcount prop-dict attr-dict `:` type(operands) (`->` type($res)^)?"; let hasVerifier = 1; } @@ -1118,7 +1120,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_SingleResultIntrinsicOp<"mbarrier.test.wait"> DefaultValuedAttr:$scope, DefaultValuedAttr:$relaxed); - let assemblyFormat = "$addr `,` $stateOrPhase attr-dict `:` type(operands) `->` type($res)"; + let assemblyFormat = "$addr `,` $stateOrPhase prop-dict attr-dict `:` type(operands) `->` type($res)"; let hasVerifier = 1; } @@ -1147,7 +1149,7 @@ def NVVM_MBarrierTryWaitOp : NVVM_SingleResultIntrinsicOp<"mbarrier.try_wait"> { DefaultValuedAttr:$scope, DefaultValuedAttr:$relaxed); - let assemblyFormat = "$addr `,` $stateOrPhase (`,` $ticks^)? attr-dict `:` type(operands) `->` type($res)"; + let assemblyFormat = "$addr `,` $stateOrPhase (`,` $ticks^)? prop-dict attr-dict `:` type(operands) `->` type($res)"; let hasVerifier = 1; } @@ -1280,7 +1282,7 @@ def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> { else createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "(`aligned` $aligned^)? attr-dict"; } def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequiresSM<90>]> { @@ -1306,7 +1308,7 @@ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed", [NVVMRequire else createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "(`aligned` $aligned^)? attr-dict"; } def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> { @@ -1327,7 +1329,7 @@ def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait", [NVVMRequiresSM<90>]> { else createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "(`aligned` $aligned^)? attr-dict"; } //===----------------------------------------------------------------------===// @@ -1374,7 +1376,7 @@ def NVVM_FenceSyncRestrictOp : NVVM_Op<"fence.sync_restrict">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "$order attr-dict"; let llvmBuilder = [{ createIntrinsicCall(builder, getFenceSyncRestrictID($order)); }]; @@ -1428,7 +1430,7 @@ def NVVM_FenceProxyOp : NVVM_Op<"fence.proxy">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "$kind (`,` `space` `=` $space^)? attr-dict"; let llvmBuilder = [{ createIntrinsicCall(builder, getFenceProxyID($kind, $space)); @@ -1510,7 +1512,7 @@ def NVVM_FenceProxySyncRestrictOp : NVVM_Op<"fence.proxy.sync_restrict">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "$order (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict"; let llvmBuilder = [{ createIntrinsicCall(builder, getFenceProxySyncRestrictID($order)); }]; @@ -1587,7 +1589,7 @@ def NVVM_ShflOp : intId, {$thread_mask, $val, $offset, $mask_and_clamp}); }]; let assemblyFormat = [{ - $kind $thread_mask `,` $val `,` $offset `,` $mask_and_clamp attr-dict + $kind $thread_mask `,` $val `,` $offset `,` $mask_and_clamp prop-dict attr-dict `:` type($val) `->` type($res) }]; let hasVerifier = 1; @@ -1634,7 +1636,7 @@ def NVVM_VoteSyncOp auto intId = getVoteSyncIntrinsicId($kind); $res = createIntrinsicCall(builder, intId, {$mask, $pred}); }]; - let assemblyFormat = "$kind $mask `,` $pred attr-dict `->` type($res)"; + let assemblyFormat = "$kind $mask `,` $pred prop-dict attr-dict `->` type($res)"; let hasVerifier = 1; } @@ -1668,7 +1670,7 @@ def NVVM_SyncWarpOp : [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar-warp-sync) }]; - let assemblyFormat = "$mask attr-dict `:` type($mask)"; + let assemblyFormat = "$mask prop-dict attr-dict `:` type($mask)"; } def NVVM_ElectSyncOp : NVVM_Op<"elect.sync"> @@ -1686,7 +1688,7 @@ def NVVM_ElectSyncOp : NVVM_Op<"elect.sync"> let arguments = (ins Optional:$membermask); let results = (outs I1:$pred); - let assemblyFormat = "($membermask^)? attr-dict `->` type(results)"; + let assemblyFormat = "($membermask^)? prop-dict attr-dict `->` type(results)"; string llvmBuilder = [{ auto *resultTuple = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_elect_sync, @@ -1805,7 +1807,7 @@ def NVVM_PermuteOp : NVVM_SingleResultIntrinsicOp<"prmt", [Pure]>, }]; let assemblyFormat = [{ - $mode $selector `,` $lo (`,` $hi^)? attr-dict `:` type($res) + $mode $selector `,` $lo (`,` $hi^)? prop-dict attr-dict `:` type($res) }]; let hasVerifier = 1; @@ -1839,7 +1841,7 @@ def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global">, I32Attr:$size, LoadCacheModifierAttr:$modifier, Optional:$cpSize)> { - let assemblyFormat = "$dst `,` $src `,` $size `,` `cache` `=` $modifier (`,` $cpSize^)? attr-dict `:` type(operands)"; + let assemblyFormat = "$dst `,` $src `,` $size `,` `cache` `=` $modifier (`,` $cpSize^)? prop-dict attr-dict `:` type(operands)"; let hasVerifier = 1; let extraClassDeclaration = [{ static llvm::Intrinsic::ID @@ -1858,7 +1860,7 @@ def NVVM_CpAsyncCommitGroupOp : NVVM_Op<"cp.async.commit.group"> { string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_cp_async_commit_group); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; } def NVVM_CpAsyncWaitGroupOp : NVVM_Op<"cp.async.wait.group">, @@ -1871,7 +1873,7 @@ def NVVM_CpAsyncWaitGroupOp : NVVM_Op<"cp.async.wait.group">, llvm::Type::getInt32Ty(moduleTranslation.getLLVMContext()), $n)); }]; - let assemblyFormat = "$n attr-dict"; + let assemblyFormat = "$n prop-dict attr-dict"; } def NVVM_CpAsyncMBarrierArriveOp : NVVM_VoidIntrinsicOp<"cp.async.mbarrier.arrive"> { @@ -1892,7 +1894,7 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_VoidIntrinsicOp<"cp.async.mbarrier.arriv AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr, DefaultValuedAttr:$noinc); - let assemblyFormat = "$addr attr-dict `:` type(operands)"; + let assemblyFormat = "$addr prop-dict attr-dict `:` type(operands)"; } //===----------------------------------------------------------------------===// @@ -1971,7 +1973,7 @@ def NVVM_ConvertFloatToTF32Op : NVVM_Op<"convert.float.to.tf32"> { DefaultValuedAttr:$sat, DefaultValuedAttr:$relu); - let assemblyFormat = "$src attr-dict"; + let assemblyFormat = "$src prop-dict attr-dict"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID getIntrinsicID(NVVM::FPRoundingMode, @@ -2003,7 +2005,7 @@ def NVVM_ConvertF32x2ToF4x2Op : NVVM_Op<"convert.f32x2.to.f4x2"> { let arguments = (ins F32:$a, F32:$b, DefaultValuedAttr:$relu, TypeAttr:$dstTy); - let assemblyFormat = "$a `,` $b attr-dict `:` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$a `,` $b prop-dict attr-dict `:` type($dst) `(` $dstTy `)`"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -2039,7 +2041,7 @@ class NVVM_ConvertFPx2ToF4x2Op TypeAttrOf:$dstTy); let assemblyFormat = - "$src attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; + "$src prop-dict attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; let extraClassDeclaration = [{ static NVVM::IDArgPair @@ -2081,7 +2083,7 @@ def NVVM_ConvertF32x2ToF6x2Op : NVVM_Op<"convert.f32x2.to.f6x2"> { F32:$b, DefaultValuedAttr:$relu, TypeAttr:$dstTy); - let assemblyFormat = "$a `,` $b attr-dict `:` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$a `,` $b prop-dict attr-dict `:` type($dst) `(` $dstTy `)`"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -2123,7 +2125,7 @@ class NVVM_ConvertFPx2ToF6x2Op VectorOfLengthAndType<[2], [!cast(srcType)]>:$src, DefaultValuedAttr:$relu, TypeAttrOf>:$dstTy); - let assemblyFormat = "$src attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$src prop-dict attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID getIntrinsicID(mlir::Type dstTy, bool hasRelu); }]; @@ -2169,7 +2171,7 @@ def NVVM_ConvertF32x2ToF8x2Op : NVVM_Op<"convert.f32x2.to.f8x2"> { DefaultValuedAttr:$sat, DefaultValuedAttr:$relu, TypeAttr:$dstTy); - let assemblyFormat = "$a `,` $b attr-dict `:` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$a `,` $b prop-dict attr-dict `:` type($dst) `(` $dstTy `)`"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID getIntrinsicID(mlir::Type dstTy, @@ -2214,7 +2216,7 @@ def NVVM_ConvertF16x2ToF8x2Op : NVVM_Op<"convert.f16x2.to.f8x2"> { VectorOfLengthAndType<[2], [F16]>:$a, DefaultValuedAttr:$relu, TypeAttr:$dstTy); - let assemblyFormat = "$a attr-dict `:` type($a) `->` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$a prop-dict attr-dict `:` type($a) `->` type($dst) `(` $dstTy `)`"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID getIntrinsicID(mlir::Type dstTy, @@ -2258,7 +2260,7 @@ def NVVM_ConvertBF16x2ToF8x2Op : NVVM_Op<"convert.bf16x2.to.f8x2"> { DefaultValuedAttr:$sat, DefaultValuedAttr:$relu, TypeAttrOf>:$dstTy); - let assemblyFormat = "$src attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; + let assemblyFormat = "$src prop-dict attr-dict `:` type($src) `->` type($dst) `(` $dstTy `)`"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID getIntrinsicID(mlir::Type dstTy, @@ -2300,7 +2302,7 @@ class NVVM_ConvertToFP16x2Op_Base { Optional:$scaleFactor, DefaultValuedAttr:$relu); let assemblyFormat = - "$a `,` $b (`,` $scaleFactor^)? attr-dict `:` type($dst)"; + "$a `,` $b (`,` $scaleFactor^)? prop-dict attr-dict `:` type($dst)"; let extraClassDeclaration = [{ static IDArgPair getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder); @@ -2382,7 +2384,7 @@ def NVVM_ConvertBF16x2ToS2F6x2Op : NVVM_Op<"convert.bf16x2.to.s2f6x2"> { Optional:$scaleFactor, DefaultValuedAttr:$relu); let assemblyFormat = - "$src (`,` $scaleFactor^)? attr-dict `:` type($src) `->` type($dst)"; + "$src (`,` $scaleFactor^)? prop-dict attr-dict `:` type($src) `->` type($dst)"; let extraClassDeclaration = [{ static IDArgPair getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder); @@ -2421,7 +2423,7 @@ def NVVM_ConvertS2F6x2ToBF16x2Op : NVVM_SingleResultIntrinsicOp<"convert.s2f6x2. DefaultValuedAttr:$sat, DefaultValuedAttr:$relu); let assemblyFormat = - "$src (`,` $scaleFactor^)? attr-dict `:` type($src) `->` type($dst)"; + "$src (`,` $scaleFactor^)? prop-dict attr-dict `:` type($src) `->` type($dst)"; } //===----------------------------------------------------------------------===// @@ -2458,7 +2460,7 @@ class NVVM_ConvertF32x2ToFPx2OpBase, ``` }]; - let assemblyFormat = "$ptr `,` $stride attr-dict `:` functional-type($ptr, $res)"; + let assemblyFormat = "$ptr `,` $stride prop-dict attr-dict `:` functional-type($ptr, $res)"; let hasVerifier = 1; } @@ -3167,7 +3169,7 @@ def NVVM_WMMAStoreOp : NVVM_Op<"wmma.store">, }]; let assemblyFormat = [{ - $ptr `,` $stride `,` $args attr-dict `:` qualified(type($ptr)) `,` + $ptr `,` $stride `,` $args prop-dict attr-dict `:` qualified(type($ptr)) `,` type($args) }]; let hasVerifier = 1; @@ -3223,7 +3225,7 @@ def NVVM_WMMAMmaOp : NVVM_Op<"wmma.mma">, ``` }]; - let assemblyFormat = "$args attr-dict `:` functional-type($args, $res)"; + let assemblyFormat = "$args prop-dict attr-dict `:` functional-type($args, $res)"; let hasVerifier = 1; } @@ -3263,7 +3265,7 @@ def NVVM_StMatrixOp: NVVM_Op<"stmatrix">, auto intId = getStMatrixIntrinsicId($layout, $sources.size(), $shape, $eltType); createIntrinsicCall(builder, intId, operands, operands[0]->getType()); }]; - let assemblyFormat = "$ptr `,` $sources attr-dict `:` type(operands)"; + let assemblyFormat = "$ptr `,` $sources prop-dict attr-dict `:` type(operands)"; let hasVerifier = 1; } @@ -3310,7 +3312,7 @@ def NVVM_LdMatrixOp: NVVM_Op<"ldmatrix", [InferTypeOpAdaptor]>, ``` }]; - let assemblyFormat = "$ptr attr-dict `:` functional-type($ptr, $res)"; + let assemblyFormat = "$ptr prop-dict attr-dict `:` functional-type($ptr, $res)"; let hasVerifier = 1; } @@ -3340,7 +3342,7 @@ def NVVM_MovMatrixOp DefaultValuedAttr:$layout, LdStMatrixEltTypeAttr:$eltType); - let assemblyFormat = "$src attr-dict `:` type($src)"; + let assemblyFormat = "$src prop-dict attr-dict `:` type($src)"; let hasVerifier = 1; } @@ -4209,7 +4211,7 @@ def CTAGroupKindAttr : def NVVM_CpAsyncBulkCommitGroupOp : NVVM_Op<"cp.async.bulk.commit.group">, Arguments<(ins )> { - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; let description = [{ This Op commits all prior initiated but uncommitted cp.async.bulk instructions into a cp.async.bulk-group. @@ -4226,7 +4228,7 @@ def NVVM_CpAsyncBulkWaitGroupOp : NVVM_Op<"cp.async.bulk.wait_group", [NVVMRequi Arguments<(ins ConfinedAttr]>:$group, OptionalAttr:$read)> { - let assemblyFormat = "$group attr-dict"; + let assemblyFormat = "$group prop-dict attr-dict"; let description = [{ Op waits for completion of the most recent bulk async-groups. @@ -4294,7 +4296,7 @@ def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : (`multicast_mask` `=` $multicastMask^ )? (`l2_cache_hint` `=` $l2CacheHint^ )? (`predicate` `=` $predicate^)? - attr-dict `:` type($dstMem) `,` type($tmaDescriptor) + prop-dict attr-dict `:` type($dstMem) `,` type($tmaDescriptor) }]; let extraClassDefinition = [{ @@ -4374,7 +4376,7 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp : `box` `[`$coordinates `]` (`l2_cache_hint` `=` $l2CacheHint^ )? (`,` `predicate` `=` $predicate^)? - attr-dict `:` type($tmaDescriptor) `,` type($srcMem) + prop-dict attr-dict `:` type($tmaDescriptor) `,` type($srcMem) }]; let extraClassDeclaration = [{ @@ -4464,7 +4466,7 @@ def NVVM_PrefetchOp : NVVM_Op<"prefetch", UnitAttr:$tensormap, UnitAttr:$uniform, UnitAttr:$in_param_space); - let assemblyFormat = "(`level` `=` $cacheLevel^ (`uniform` $uniform^)? `,`)? (`tensormap` $tensormap^ (`in_param_space` $in_param_space^)? `,`)? (`evict_priority` `=` $evictPriority^ `,`)? $addr (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)"; + let assemblyFormat = "(`level` `=` $cacheLevel^ (`uniform` $uniform^)? `,`)? (`tensormap` $tensormap^ (`in_param_space` $in_param_space^)? `,`)? (`evict_priority` `=` $evictPriority^ `,`)? $addr (`,` `predicate` `=` $predicate^)? prop-dict attr-dict `:` type(operands)"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -4513,7 +4515,7 @@ def NVVM_CpAsyncBulkPrefetchOp : NVVM_VoidIntrinsicOp<"cp.async.bulk.prefetch"> let assemblyFormat = [{ $srcMem `,` $size (`l2_cache_hint` `=` $l2CacheHint^ )? - attr-dict `:` type($srcMem) + prop-dict attr-dict `:` type($srcMem) }]; } @@ -4542,7 +4544,7 @@ def NVVM_CpAsyncBulkTensorPrefetchOp : `box` `[`$coordinates `]` (`im2col` `[` $im2colOffsets^ `]` )? (`l2_cache_hint` `=` $l2CacheHint^ )? - attr-dict `:` type($tmaDescriptor) + prop-dict attr-dict `:` type($tmaDescriptor) }]; let hasVerifier = 1; @@ -4599,7 +4601,7 @@ def NVVM_CpAsyncBulkTensorReduceOp : $srcMem `,` `box` `[`$coordinates `]` (`l2_cache_hint` `=` $l2CacheHint^ )? - attr-dict `:` type($tmaDescriptor) `,` type($srcMem) + prop-dict attr-dict `:` type($tmaDescriptor) `,` type($srcMem) }]; let extraClassDeclaration = [{ @@ -4649,7 +4651,7 @@ def NVVM_CpAsyncBulkGlobalToSharedClusterOp : $dstMem `,` $srcMem `,` $mbar `,` $size (`multicast_mask` `=` $multicastMask^ )? (`l2_cache_hint` `=` $l2CacheHint^ )? - attr-dict `:` type($dstMem) `,` type($srcMem) + prop-dict attr-dict `:` type($dstMem) `,` type($srcMem) }]; let hasVerifier = 1; @@ -4684,7 +4686,7 @@ def NVVM_CpAsyncBulkSharedCTAToSharedClusterOp : let assemblyFormat = [{ $dstMem `,` $srcMem `,` $mbar `,` $size - attr-dict `:` type($dstMem) `,` type($srcMem) + prop-dict attr-dict `:` type($dstMem) `,` type($srcMem) }]; string llvmBuilder = [{ @@ -4740,7 +4742,7 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp : $dstMem `,` $srcMem `,` $size (`l2_cache_hint` `=` $l2CacheHint^ )? (`byte_mask` `=` $byteMask^ )? - attr-dict `:` type($dstMem) `,` type($srcMem) + prop-dict attr-dict `:` type($dstMem) `,` type($srcMem) }]; let extraClassDeclaration = [{ @@ -4767,14 +4769,14 @@ def NVVM_WgmmaFenceAlignedOp : NVVM_Op<"wgmma.fence.aligned", [NVVMRequiresSMa<[ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#asynchronous-warpgroup-level-matrix-instructions-wgmma-fence) }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; string llvmBuilder = [{ createIntrinsicCall(builder, llvm::Intrinsic::nvvm_wgmma_fence_sync_aligned); }]; } def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; let description = [{ Commits all prior uncommitted warpgroup level matrix multiplication operations. @@ -4787,7 +4789,7 @@ def NVVM_WgmmaGroupSyncAlignedOp : NVVM_Op<"wgmma.commit.group.sync.aligned", [N def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned", [NVVMRequiresSMa<[90]>]> { let arguments = (ins I64Attr:$group); - let assemblyFormat = "attr-dict $group"; + let assemblyFormat = "prop-dict attr-dict $group"; let description = [{ Signal the completion of a preceding warpgroup operation. @@ -4874,7 +4876,7 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async", `D` `[` $typeD `,` $scaleD (`,` $satfinite^)? `]` `,` `A` `[` $typeA `,` $scaleA `,` $layoutA `]` `,` `B` `[` $typeB `,` $scaleB `,` $layoutB `]` - attr-dict `:` + prop-dict attr-dict `:` type($inouts) `->` type($results) }]; @@ -4974,7 +4976,7 @@ def NVVM_GriddepcontrolOp : NVVM_Op<"griddepcontrol", []> { let arguments = (ins GridDepActionAttr:$kind); - let assemblyFormat = "$kind attr-dict"; + let assemblyFormat = "$kind prop-dict attr-dict"; string llvmBuilder = [{ llvm::Intrinsic::ID id; @@ -5010,7 +5012,7 @@ def NVVM_MapaOp: NVVM_Op<"mapa", $res = createIntrinsicCall(builder, intId, {$a, $b}); }]; - let assemblyFormat = "$a`,` $b attr-dict `:` type($a) `->` type($res)"; + let assemblyFormat = "$a`,` $b prop-dict attr-dict `:` type($a) `->` type($res)"; } //===----------------------------------------------------------------------===// @@ -5055,7 +5057,7 @@ def NVVM_MatchSyncOp : NVVM_Op<"match.sync", [InferTypeOpAdaptor]>, $res = createIntrinsicCall(builder, intId, {$thread_mask, $val}); }]; - let assemblyFormat = "$kind $thread_mask `,` $val attr-dict `:` type($val) `->` type($res)"; + let assemblyFormat = "$kind $thread_mask `,` $val prop-dict attr-dict `:` type($val) `->` type($res)"; let hasVerifier = 1; } @@ -5084,7 +5086,7 @@ def NVVM_BulkStoreOp: NVVM_Op<"st.bulk"> { {$addr, $size, builder.getInt64($initVal)}); }]; - let assemblyFormat = "$addr `,` `size` `=` $size (`,` `init` `=` $initVal^)? attr-dict `:` type($addr)"; + let assemblyFormat = "$addr `,` `size` `=` $size (`,` `init` `=` $initVal^)? prop-dict attr-dict `:` type($addr)"; let hasVerifier = 1; } @@ -5099,7 +5101,7 @@ def NVVM_Exit : NVVM_Op<"exit"> { createIntrinsicCall(builder, llvm::Intrinsic::nvvm_exit); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; } @@ -5117,7 +5119,7 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> { createIntrinsicCall(builder, llvm::Intrinsic::debugtrap); }]; - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; } //===----------------------------------------------------------------------===// @@ -5162,7 +5164,7 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSMf<[100, 101, 1 I32:$nCols, DefaultValuedAttr:$group); - let assemblyFormat = "$addr `,` $nCols attr-dict `:` type(operands)"; + let assemblyFormat = "$addr `,` $nCols prop-dict attr-dict `:` type(operands)"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID @@ -5190,7 +5192,7 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSMf<[100, 10 let arguments = (ins LLVM_PointerTensor:$taddr, I32:$nCols, DefaultValuedAttr:$group); - let assemblyFormat = "$taddr `,` $nCols attr-dict `:` type(operands)"; + let assemblyFormat = "$taddr `,` $nCols prop-dict attr-dict `:` type(operands)"; let extraClassDeclaration = [{ static llvm::Intrinsic::ID @@ -5219,7 +5221,7 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm let arguments = (ins DefaultValuedAttr:$group); - let assemblyFormat = "attr-dict"; + let assemblyFormat = "prop-dict attr-dict"; string llvmBuilder = [{ auto id = ($group == NVVM::CTAGroupKind::CTA_1) ? @@ -5241,7 +5243,7 @@ def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence", [NVVMRequiresSMf<[100, 101, 1 }]; let arguments = (ins Tcgen05FenceKindAttr:$kind); - let assemblyFormat = "$kind attr-dict"; + let assemblyFormat = "$kind prop-dict attr-dict"; string llvmBuilder = [{ auto id = ($kind == NVVM::Tcgen05FenceKind::BEFORE_THREAD_SYNC) @@ -5263,7 +5265,7 @@ def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait", [NVVMRequiresSMf<[100, 101, 110 }]; let arguments = (ins Tcgen05WaitKindAttr:$kind); - let assemblyFormat = "$kind attr-dict"; + let assemblyFormat = "$kind prop-dict attr-dict"; string llvmBuilder = [{ auto id = ($kind == NVVM::Tcgen05WaitKind::LOAD) @@ -5294,7 +5296,7 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSMf<[100, 101, let assemblyFormat = [{ $addr (`,` `multicast_mask` `=` $multicastMask^)? - attr-dict `:` type(operands) + prop-dict attr-dict `:` type(operands) }]; let extraClassDeclaration = [{ @@ -5326,7 +5328,7 @@ def NVVM_Tcgen05ShiftOp : NVVM_Op<"tcgen05.shift", let arguments = (ins LLVM_PointerTensor:$taddr, DefaultValuedAttr:$group); - let assemblyFormat = "$taddr attr-dict `:` type(operands)"; + let assemblyFormat = "$taddr prop-dict attr-dict `:` type(operands)"; string llvmBuilder = [{ auto id = ($group == NVVM::CTAGroupKind::CTA_1) ? @@ -5407,7 +5409,7 @@ def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSMf<[100, 101, 110]>]> LLVM_PointerTensor:$taddr, I64:$smem_desc); - let assemblyFormat = "$taddr`,` $smem_desc attr-dict"; + let assemblyFormat = "$taddr`,` $smem_desc prop-dict attr-dict"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -5473,7 +5475,7 @@ def NVVM_Tcgen05MmaSmemDescOp : NVVM_Op<"tcgen05.mma_smem_desc", []> { let results = (outs I64:$res); let assemblyFormat = [{ - `(` operands `)` attr-dict `:` `(` type(operands) `)` `->` type($res) + `(` operands `)` prop-dict attr-dict `:` `(` type(operands) `)` `->` type($res) }]; let extraClassDeclaration = [{ @@ -5529,7 +5531,7 @@ def NVVM_Tcgen05LdOp : NVVM_Op<"tcgen05.ld", [NVVMRequiresSMf<[100, 101, 110]>]> [2, 4, 8, 16, 32, 64, 128], [I32]>]>:$res); let assemblyFormat = [{ - $tmemAddr (`,` $offset^)? (`pack` $pack^)? attr-dict `:` type($res) + $tmemAddr (`,` $offset^)? (`pack` $pack^)? prop-dict attr-dict `:` type($res) }]; let description = [{ @@ -5623,7 +5625,7 @@ def NVVM_Tcgen05LdRedOp : NVVM_Op<"tcgen05.ld.red", AnyTypeOf<[I32, F32]>:$redVal); let assemblyFormat = [{ - $op $addr (`,` $offset^)? attr-dict `:` type($data) `,` type($redVal) + $op $addr (`,` $offset^)? prop-dict attr-dict `:` type($data) `,` type($redVal) }]; let description = [{ @@ -5710,7 +5712,7 @@ def NVVM_Tcgen05StOp : NVVM_Op<"tcgen05.st", [NVVMRequiresSMf<[100, 101, 110]>]> ); let assemblyFormat = [{ - $tmemAddr `,` $val (`,` $offset^)? (`unpack` $unpack^)? attr-dict `:` type($val) + $tmemAddr `,` $val (`,` $offset^)? (`unpack` $unpack^)? prop-dict attr-dict `:` type($val) }]; let description = [{ @@ -5831,7 +5833,7 @@ def NVVM_DotAccumulate4WayOp : NVVM_SingleResultIntrinsicOp<"dot.accumulate.4way let results = (outs I32:$res); - let assemblyFormat = "$a $a_type `,` $b $b_type `,` $c attr-dict `:` type($a) `,` type($b)"; + let assemblyFormat = "$a $a_type `,` $b $b_type `,` $c prop-dict attr-dict `:` type($a) `,` type($b)"; } def NVVM_DotAccumulate2WayOp : NVVM_SingleResultIntrinsicOp<"dot.accumulate.2way"> { @@ -5873,7 +5875,7 @@ def NVVM_DotAccumulate2WayOp : NVVM_SingleResultIntrinsicOp<"dot.accumulate.2way let results = (outs I32:$res); - let assemblyFormat = "$a $a_type `,` $b $b_type `,` $c attr-dict `:` type($a) `,` type($b)"; + let assemblyFormat = "$a $a_type `,` $b $b_type `,` $c prop-dict attr-dict `:` type($a) `,` type($b)"; } //===----------------------------------------------------------------------===// @@ -5906,7 +5908,7 @@ def NVVM_ClusterLaunchControlTryCancelOp LLVM_PointerShared: $smemAddress, LLVM_PointerShared: $mbarrier); - let assemblyFormat = "(`multicast` $multicast^ `,`)? $smemAddress `,` $mbarrier attr-dict"; + let assemblyFormat = "(`multicast` $multicast^ `,`)? $smemAddress `,` $mbarrier prop-dict attr-dict"; let extraClassDeclaration = [{ static mlir::NVVM::IDArgPair @@ -5969,7 +5971,7 @@ def NVVM_ClusterLaunchControlQueryCancelOp I128:$try_cancel_response); let results = (outs AnyTypeOf<[I1, I32]>:$res); - let assemblyFormat = "`query` `=` $query_type `,` $try_cancel_response attr-dict `:` type($res)"; + let assemblyFormat = "`query` `=` $query_type `,` $try_cancel_response prop-dict attr-dict `:` type($res)"; let hasVerifier = 1; @@ -6139,7 +6141,7 @@ def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma", let assemblyFormat = [{ $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`scale` `=` $scaleInputD^)? - (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)` + (`mask` `=` $disableOutputLane^)? prop-dict attr-dict `:` `(` type(operands) `)` }]; let hasVerifier = true; @@ -6202,7 +6204,7 @@ def NVVM_Tcgen05MMASparseOp : NVVM_Op<"tcgen05.mma.sp", ); let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`scale` `=` $scaleInputD^)? (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)` + $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`scale` `=` $scaleInputD^)? (`mask` `=` $disableOutputLane^)? prop-dict attr-dict `:` `(` type(operands) `)` }]; let hasVerifier = true; @@ -6293,7 +6295,7 @@ def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale", let assemblyFormat = [{ $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` - $scaleA `,` $scaleB attr-dict `:` `(` type(operands) `)` + $scaleA `,` $scaleB prop-dict attr-dict `:` `(` type(operands) `)` }]; let hasVerifier = true; @@ -6357,7 +6359,7 @@ def NVVM_Tcgen05MMASparseBlockScaleOp : NVVM_Op<"tcgen05.mma.sp.block_scale", let assemblyFormat = [{ $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` - $sparseMetadata `,` $scaleA `,` $scaleB attr-dict `:` `(` type(operands) `)` + $sparseMetadata `,` $scaleA `,` $scaleB prop-dict attr-dict `:` `(` type(operands) `)` }]; let hasVerifier = true; @@ -6447,7 +6449,7 @@ def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws", let assemblyFormat = [{ $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`,` $zeroColMask^)? - attr-dict `:` `(` type(operands) `)` + prop-dict attr-dict `:` `(` type(operands) `)` }]; let extraClassDeclaration = [{ @@ -6507,7 +6509,7 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp", ); let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`,` $zeroColMask^)? attr-dict `:` `(` type(operands) `)` + $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`,` $zeroColMask^)? prop-dict attr-dict `:` `(` type(operands) `)` }]; let extraClassDeclaration = [{ @@ -6542,7 +6544,7 @@ class NVVM_FloatBinaryOp traits = []> : DefaultValuedAttr:$sat, DefaultValuedAttr:$ftz)>, Results<(outs SIMTFloatType:$res)> { - let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)"; + let assemblyFormat = "$lhs `,` $rhs prop-dict attr-dict `:` type($res)"; } def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> { @@ -6633,7 +6635,7 @@ def NVVM_FmaOp : NVVM_Op<"fma", [Pure, SameOperandsAndResultType]> { DefaultValuedAttr:$oob ); let results = (outs SIMTFloatType:$res); - let assemblyFormat = "$a `,` $b `,` $c attr-dict `:` type($a)"; + let assemblyFormat = "$a `,` $b `,` $c prop-dict attr-dict `:` type($a)"; let hasVerifier = 1; let extraClassDeclaration = [{ @@ -6831,7 +6833,7 @@ def NVVM_TensormapReplaceOp : NVVM_VoidIntrinsicOp<"tensormap.replace", ); let assemblyFormat = [{ - `field` `=` $field (`[` $ord^ `]`)? `,` `new_value` `=` ($new_value_attr^):($new_value)? `in` $addr attr-dict `:` type(operands) + `field` `=` $field (`[` $ord^ `]`)? `,` `new_value` `=` ($new_value_attr^):($new_value)? `in` $addr prop-dict attr-dict `:` type(operands) }]; } diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index d5aad8321cb9f..ae42558594d1d 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -128,7 +128,7 @@ gpu.module @test_module_4 { // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32 // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32 // CHECK: %[[#CLAMP:]] = llvm.sub %[[#WIDTH]], %[[#ONE]] : i32 - // CHECK: %[[#SHFL:]] = nvvm.shfl.sync bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: %[[#SHFL:]] = nvvm.shfl.sync bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][0] : !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][1] : !llvm.struct<(f32, i1)> %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : f32 @@ -137,13 +137,13 @@ gpu.module @test_module_4 { // CHECK: %[[#THIRTY_TWO:]] = llvm.mlir.constant(32 : i32) : i32 // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32 // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32 - // CHECK: %[[#SHFL:]] = nvvm.shfl.sync up %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#NUM_LANES]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: %[[#SHFL:]] = nvvm.shfl.sync up %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#NUM_LANES]] <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][0] : !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][1] : !llvm.struct<(f32, i1)> %shflu, %predu = gpu.shuffle up %arg0, %arg1, %arg2 : f32 - // CHECK: nvvm.shfl.sync down {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: nvvm.shfl.sync down {{.*}} <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> %shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32 - // CHECK: nvvm.shfl.sync idx {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: nvvm.shfl.sync idx {{.*}} <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> %shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32 func.return %shfl, %shflu, %shfld, %shfli, %pred, %predu, %predd, %predi diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir index a0801443057ea..158bdc8f9a335 100644 --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -21,7 +21,7 @@ gpu.module @test_module { // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f16 // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK-SAME: <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -33,7 +33,7 @@ gpu.module @test_module { // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i32) -> !llvm.ptr<3>, f16 // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK32-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK32-SAME: <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %0 : !gpu.mma_matrix<16x16xf16, "AOp"> } @@ -60,7 +60,7 @@ gpu.module @test_module { // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i8 // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + // CHECK-SAME: <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: llvm.return %[[FRAG]] : !llvm.struct<(i32, i32)> // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -72,7 +72,7 @@ gpu.module @test_module { // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i32) -> !llvm.ptr<3>, i8 // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK32-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + // CHECK32-SAME: <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK32: llvm.return %[[FRAG]] : !llvm.struct<(i32, i32)> return %0 : !gpu.mma_matrix<16x16xsi8, "AOp"> } @@ -95,7 +95,7 @@ gpu.module @test_module { // CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 // CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f64 // CHECK: %[[C32_I32:.*]] = llvm.mlir.constant(32 : index) : i32 - // CHECK: %[[LOAD:.*]] = nvvm.wmma.load %[[GEP]], %[[C32_I32]] {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} : (!llvm.ptr<3>) -> f64 + // CHECK: %[[LOAD:.*]] = nvvm.wmma.load %[[GEP]], %[[C32_I32]] <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : (!llvm.ptr<3>) -> f64 // CHECK: llvm.return %[[LOAD]] : f64 } } @@ -129,7 +129,7 @@ gpu.module @test_module { // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, f16 // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK: nvvm.wmma.store %[[ADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] - // CHECK-SAME: {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr<3>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> + // CHECK-SAME: <{eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : !llvm.ptr<3>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK: llvm.return // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -148,7 +148,7 @@ gpu.module @test_module { // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr<3>, i32) -> !llvm.ptr<3>, f16 // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: nvvm.wmma.store %[[ADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] - // CHECK32-SAME: {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr<3>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> + // CHECK32-SAME: <{eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : !llvm.ptr<3>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK32: llvm.return return } @@ -183,7 +183,7 @@ gpu.module @test_module { // CHECK: %[[C3:.*]] = llvm.extractvalue %[[C]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[C4:.*]] = llvm.extractvalue %[[C]][3] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[RES:.*]] = nvvm.wmma.mma %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[A8]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[B8]], %[[C1]], %[[C2]], %[[C3]], %[[C4]] - // CHECK-SAME: {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : ( + // CHECK-SAME: <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : ( // CHECK-SAME: vector<2xf16>, {{.*}}) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[RES]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %D : !gpu.mma_matrix<16x16xf16, "COp"> @@ -212,7 +212,7 @@ gpu.module @test_module { // CHECK: %[[C7:.*]] = llvm.extractvalue %[[C]][6] : !llvm.struct<(i32, i32, i32, i32, i32, i32, i32, i32)> // CHECK: %[[C8:.*]] = llvm.extractvalue %[[C]][7] : !llvm.struct<(i32, i32, i32, i32, i32, i32, i32, i32)> // CHECK: %[[RES:.*]] = nvvm.wmma.mma %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[B1]], %[[C1]], %[[C2]], %[[C3]], %[[C4]], %[[C5]], %[[C6]], %[[C7]], %[[C8]] - // CHECK-SAME: {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 32 : i32, n = 8 : i32} : ( + // CHECK-SAME: <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 32 : i32, n = 8 : i32}> : ( // CHECK-SAME: i32, {{.*}}) -> !llvm.struct<(i32, i32, i32, i32, i32, i32, i32, i32)> // CHECK: llvm.return %[[RES]] : !llvm.struct<(i32, i32, i32, i32, i32, i32, i32, i32)> return %D : !gpu.mma_matrix<32x8xi32, "COp"> @@ -224,13 +224,13 @@ gpu.module @test_module { gpu.module @test_module { // CHECK-LABEL: func @gpu_wmma_mma_loop_op -// CHECK: %[[C:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[C]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb1(%{{.*}}: i64, %[[ACC:.+]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>): // 2 preds: ^bb0, ^bb2 // CHECK: llvm.cond_br %{{.*}}, ^bb2, ^bb3 // CHECK: ^bb2: // pred: ^bb1 -// CHECK: %[[A:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[B:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A0:.+]] = llvm.extractvalue %[[A]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A1:.+]] = llvm.extractvalue %[[A]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A2:.+]] = llvm.extractvalue %[[A]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> @@ -251,14 +251,14 @@ gpu.module @test_module { // CHECK: %[[ACC1:.+]] = llvm.extractvalue %[[ACC]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC2:.+]] = llvm.extractvalue %[[ACC]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC3:.+]] = llvm.extractvalue %[[ACC]][3] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.mma %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2xf16>, {{.*}} -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.mma %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2xf16>, {{.*}} -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[ACC_MUL]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb3: // pred: ^bb1 // CHECK: %[[E0:.+]] = llvm.extractvalue %[[ACC]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E1:.+]] = llvm.extractvalue %[[ACC]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E2:.+]] = llvm.extractvalue %[[ACC]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E3:.+]] = llvm.extractvalue %[[ACC]][3] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> +// CHECK: nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] <{eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> func.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 6b7fd1578c35a..0d799d7913e41 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -182,7 +182,7 @@ func.func @m8n8k4_f64(%arg0: vector<1x1xf64>, %arg1: vector<1x1xf64>, %arg2: vec // CHECK-LABEL: @ldmatrix_x4 func.func @ldmatrix_x4(%arg0: memref<128x128xf16, 3>) -> vector<4x2xf16> { %c0 = arith.constant 0 : index - // CHECK: nvvm.ldmatrix {{%.+}} {eltType = #nvvm.ld_st_matrix_elt_type, layout = #nvvm.mma_layout, num = 4 : i32, shape = #nvvm.ld_st_matrix_shape} : {{.*}} -> !llvm.struct<(i32, i32, i32, i32)> + // CHECK: nvvm.ldmatrix {{%.+}} <{eltType = #nvvm.ld_st_matrix_elt_type, layout = #nvvm.mma_layout, num = 4 : i32, shape = #nvvm.ld_st_matrix_shape}> : {{.*}} -> !llvm.struct<(i32, i32, i32, i32)> %a = nvgpu.ldmatrix %arg0[%c0, %c0] {transpose = false, numTiles = 4 : i32} : memref<128x128xf16, 3> -> vector<4x2xf16> // CHECK: llvm.extractvalue // CHECK: llvm.bitcast @@ -202,7 +202,7 @@ func.func @ldmatrix_x4(%arg0: memref<128x128xf16, 3>) -> vector<4x2xf16> { // CHECK-LABEL: @ldmatrix_x1 func.func @ldmatrix_x1(%arg0: memref<128x128xf16, 3>) -> vector<1x2xf16> { %c0 = arith.constant 0 : index - // CHECK: nvvm.ldmatrix {{%.+}} {eltType = #nvvm.ld_st_matrix_elt_type, layout = #nvvm.mma_layout, num = 1 : i32, shape = #nvvm.ld_st_matrix_shape} : {{.*}} -> i32 + // CHECK: nvvm.ldmatrix {{%.+}} <{eltType = #nvvm.ld_st_matrix_elt_type, layout = #nvvm.mma_layout, num = 1 : i32, shape = #nvvm.ld_st_matrix_shape}> : {{.*}} -> i32 %a = nvgpu.ldmatrix %arg0[%c0, %c0] {transpose = false, numTiles = 1 : i32} : memref<128x128xf16, 3> -> vector<1x2xf16> // CHECK: llvm.bitcast // CHECK: llvm.insertvalue diff --git a/mlir/test/Conversion/NVVMToLLVM/invalid.mlir b/mlir/test/Conversion/NVVMToLLVM/invalid.mlir index 9ebe3a009adf2..8c50977544300 100644 --- a/mlir/test/Conversion/NVVMToLLVM/invalid.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/invalid.mlir @@ -152,7 +152,7 @@ func.func @set_max_register() { func.func @fence_proxy() { // expected-error @+1 {{op only async_shared fence can have space attribute}} - nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + nvvm.fence.proxy #nvvm.proxy_kind, space = #nvvm.shared_space func.return } @@ -160,6 +160,6 @@ func.func @fence_proxy() { func.func @fence_proxy() { // expected-error @+1 {{op async_shared fence requires space attribute}} - nvvm.fence.proxy { kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind func.return } diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 5a381ce1e679e..9158279988192 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -82,33 +82,33 @@ func.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32) func.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr - // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true} - nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr + // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} <{noinc = true}> + nvvm.cp.async.mbarrier.arrive %bar_gen <{noinc = true}> : !llvm.ptr // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> - // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true} - nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> + // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} <{noinc = true}> + nvvm.cp.async.mbarrier.arrive %bar_shared <{noinc = true}> : !llvm.ptr<3> llvm.return } // CHECK-LABEL: @tma_load_3d_all func.func @tma_load_3d_all(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<7>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %off0: i16, %off1: i16, %ctamask : i16, %cacheHint : i64, %p : i1) { // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$9 cp.async.bulk.tensor.3d.shared::cluster.global.mbarrier::complete_tx::bytes.im2col.multicast::cluster.L2::cache_hint [$0], [$1, {$2,$3,$4} ], [$5],{$6}, $7, $8;", "l,l,r,r,r,r,h,h,l,b" - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr return } // CHECK-LABEL: @tma_load_4d_all func.func @tma_load_4d_all(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<7>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %off0: i16, %off1: i16, %ctamask : i16, %cacheHint : i64, %p : i1) { // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$11 cp.async.bulk.tensor.4d.shared::cluster.global.mbarrier::complete_tx::bytes.im2col.multicast::cluster.L2::cache_hint [$0], [$1, {$2,$3,$4,$5} ], [$6],{$7,$8}, $9, $10;", "l,l,r,r,r,r,r,h,h,h,l,b" - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3] im2col[%off0,%off1] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3] im2col[%off0,%off1] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr return } // CHECK-LABEL: @tma_load_5d_all func.func @tma_load_5d_all(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<7>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %crd4: i32, %off0: i16, %off1: i16, %off2: i16, %ctamask : i16, %cacheHint : i64, %p : i1) { // CHECK: lvm.inline_asm has_side_effects asm_dialect = att "@$13 cp.async.bulk.tensor.5d.shared::cluster.global.mbarrier::complete_tx::bytes.im2col.multicast::cluster.L2::cache_hint [$0], [$1, {$2,$3,$4,$5,$6} ], [$7],{$8,$9,$10}, $11, $12;", "l,l,r,r,r,r,r,r,h,h,h,h,l,b" - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3,%crd4] im2col[%off0,%off1,%off2] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3,%crd4] im2col[%off0,%off1,%off2] multicast_mask = %ctamask l2_cache_hint = %cacheHint predicate = %p <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr return } @@ -573,12 +573,12 @@ func.func @cp_async_bulk_commit() { func.func @cp_async_bulk_wait_group() { // CHECK: nvvm.cp.async.bulk.wait_group 1 // CHECK: nvvm.cp.async.bulk.wait_group 0 - // CHECK: nvvm.cp.async.bulk.wait_group 5 {read} - // CHECK: nvvm.cp.async.bulk.wait_group 0 {read} + // CHECK: nvvm.cp.async.bulk.wait_group 5 <{read}> + // CHECK: nvvm.cp.async.bulk.wait_group 0 <{read}> nvvm.cp.async.bulk.wait_group 1 nvvm.cp.async.bulk.wait_group 0 - nvvm.cp.async.bulk.wait_group 5 {read} - nvvm.cp.async.bulk.wait_group 0 {read} + nvvm.cp.async.bulk.wait_group 5 <{read}> + nvvm.cp.async.bulk.wait_group 0 <{read}> func.return } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index cb5148a0ca9ae..917259fce61a1 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -665,21 +665,21 @@ func.func @zero_non_llvm_type() { func.func @nvvm_invalid_shfl_pred_1(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected return type to be a two-element struct}} - %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> i32 + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 <{return_value_and_is_valid}> : i32 -> i32 } // ----- func.func @nvvm_invalid_shfl_pred_2(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected return type to be a two-element struct}} - %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32)> } // ----- func.func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected second element in the returned struct to be of type 'i1' but got 'i32' instead}} - %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i32)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i32)> } // ----- @@ -1101,7 +1101,7 @@ module { llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<5>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } @@ -1111,7 +1111,7 @@ llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } @@ -1121,7 +1121,7 @@ llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return @@ -1132,7 +1132,7 @@ llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return @@ -1145,7 +1145,7 @@ llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32, %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 - {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : !llvm.ptr<5>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -1164,7 +1164,7 @@ llvm.func @gpu_wmma_mma_op_invalid_operands(%arg0: vector<2 x f16>, %arg1: vecto %arg18: vector<2 x f16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected 20 arguments}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> @@ -1185,7 +1185,7 @@ llvm.func @gpu_wmma_mma_op_results(%arg0: vector<2 x f16>, %arg1: vector<2 x f16 %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> llvm.return @@ -1205,7 +1205,7 @@ llvm.func @gpu_wmma_mma_op_invalid_ab_operands(%arg0: vector<2 x f16>, %arg1: ve %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 15 to be of type 'vector<2xf16>'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1224,7 +1224,7 @@ llvm.func @gpu_wmma_mma_op_invalid_c_operand(%arg0: vector<2 x f16>, %arg1: vect %arg20: f32, %arg21: f32, %arg22: f32, %arg23: vector<2xf16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 23 to be of type 'f32'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1243,7 +1243,7 @@ llvm.func @gpu_wmma_mma_op_invalid_result(%arg0: vector<2 x f16>, %arg1: vector< %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 8 elements of type 'f32'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, vector<2xf16>)> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir b/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir index aa5439a70395e..8fec21b705263 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir @@ -9,8 +9,8 @@ func.func @nvvm_cos_f32(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_cos_ftz_f32 func.func @nvvm_cos_ftz_f32(%arg0: f32) -> f32 { - // CHECK: nvvm.cos {{.*}} {ftz = true} : f32 - %0 = nvvm.cos %arg0 {ftz = true} : f32 + // CHECK: nvvm.cos {{.*}}, ftz = true : f32 + %0 = nvvm.cos %arg0, ftz = true : f32 return %0 : f32 } // RUN: mlir-opt %s -split-input-file | FileCheck %s @@ -24,8 +24,8 @@ func.func @nvvm_sin_f32(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_sin_ftz_f32 func.func @nvvm_sin_ftz_f32(%arg0: f32) -> f32 { - // CHECK: nvvm.sin {{.*}} {ftz = true} : f32 - %0 = nvvm.sin %arg0 {ftz = true} : f32 + // CHECK: nvvm.sin {{.*}}, ftz = true : f32 + %0 = nvvm.sin %arg0, ftz = true : f32 return %0 : f32 } @@ -38,8 +38,8 @@ func.func @nvvm_lg2_f32(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_lg2_ftz_f32 func.func @nvvm_lg2_ftz_f32(%arg0: f32) -> f32 { - // CHECK: nvvm.log2 {{.*}} {ftz = true} : f32 - %0 = nvvm.log2 %arg0 {ftz = true} : f32 + // CHECK: nvvm.log2 {{.*}}, ftz = true : f32 + %0 = nvvm.log2 %arg0, ftz = true : f32 return %0 : f32 } @@ -52,7 +52,7 @@ func.func @nvvm_ex2_f32(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_ex2_ftz_f32 func.func @nvvm_ex2_ftz_f32(%arg0: f32) -> f32 { - // CHECK: nvvm.ex2 {{.*}} {ftz = true} : f32 - %0 = nvvm.ex2 %arg0 {ftz = true} : f32 + // CHECK: nvvm.ex2 {{.*}}, ftz = true : f32 + %0 = nvvm.ex2 %arg0, ftz = true : f32 return %0 : f32 } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index e3a98cc9cfc34..dc2131a6253fc 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -62,8 +62,8 @@ llvm.func @llvm_nvvm_barrier_arrive(%barId : i32, %numberOfThreads : i32) { func.func @llvm_nvvm_cluster_arrive() { // CHECK: nvvm.cluster.arrive nvvm.cluster.arrive - // CHECK: nvvm.cluster.arrive {aligned} - nvvm.cluster.arrive {aligned} + // CHECK: nvvm.cluster.arrive aligned + nvvm.cluster.arrive aligned llvm.return } @@ -71,8 +71,8 @@ func.func @llvm_nvvm_cluster_arrive() { func.func @llvm_nvvm_cluster_arrive_relaxed() { // CHECK: nvvm.cluster.arrive.relaxed nvvm.cluster.arrive.relaxed - // CHECK: nvvm.cluster.arrive.relaxed {aligned} - nvvm.cluster.arrive.relaxed {aligned} + // CHECK: nvvm.cluster.arrive.relaxed aligned + nvvm.cluster.arrive.relaxed aligned llvm.return } @@ -80,8 +80,8 @@ func.func @llvm_nvvm_cluster_arrive_relaxed() { func.func @llvm_nvvm_cluster_wait() { // CHECK: nvvm.cluster.wait nvvm.cluster.wait - // CHECK: nvvm.cluster.wait {aligned} - nvvm.cluster.wait {aligned} + // CHECK: nvvm.cluster.wait aligned + nvvm.cluster.wait aligned llvm.return } @@ -106,10 +106,10 @@ func.func @nvvm_shfl( func.func @nvvm_shfl_pred( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> !llvm.struct<(i32, i1)> { - // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> - %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> - // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> - %1 = nvvm.shfl.sync bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> + %1 = nvvm.shfl.sync bfly %arg0, %arg4, %arg1, %arg2 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> llvm.return %0 : !llvm.struct<(i32, i1)> } @@ -128,9 +128,9 @@ func.func @nvvm_vote(%arg0 : i32, %arg1 : i1) -> i32 { // CHECK-LABEL: @nvvm_movmatrix func.func @nvvm_movmatrix(%src : i32) -> i32 { - // CHECK: nvvm.movmatrix %{{.*}} {eltType = #nvvm.ld_st_matrix_elt_type, shape = #nvvm.ld_st_matrix_shape} : i32 - %dst = nvvm.movmatrix %src {shape = #nvvm.ld_st_matrix_shape, - eltType = #nvvm.ld_st_matrix_elt_type} : i32 + // CHECK: nvvm.movmatrix %{{.*}} <{eltType = #nvvm.ld_st_matrix_elt_type, shape = #nvvm.ld_st_matrix_shape}> : i32 + %dst = nvvm.movmatrix %src <{shape = #nvvm.ld_st_matrix_shape, + eltType = #nvvm.ld_st_matrix_elt_type}> : i32 llvm.return %dst : i32 } @@ -322,9 +322,9 @@ func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, // CHECK-LABEL: @nvvm_wmma_load_tf32 func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { - // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + // CHECK: nvvm.wmma.load {{.*}} <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } @@ -334,9 +334,9 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { - // CHECK: nvvm.wmma.mma {{.*}} {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + // CHECK: nvvm.wmma.mma {{.*}} <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %r : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> @@ -380,19 +380,19 @@ llvm.func @redux_sync_f32(%value: f32, %offset: i32) -> f32 { // CHECK: nvvm.redux.sync fmin %{{.*}} %r1 = nvvm.redux.sync fmin %value, %offset: f32 -> f32 // CHECK: nvvm.redux.sync fmin %{{.*}} - %r2 = nvvm.redux.sync fmin %value, %offset {abs = true}: f32 -> f32 + %r2 = nvvm.redux.sync fmin %value, %offset abs = true : f32 -> f32 // CHECK: nvvm.redux.sync fmin %{{.*}} - %r3 = nvvm.redux.sync fmin %value, %offset {NaN = true}: f32 -> f32 + %r3 = nvvm.redux.sync fmin %value, %offset nan = true : f32 -> f32 // CHECK: nvvm.redux.sync fmin %{{.*}} - %r4 = nvvm.redux.sync fmin %value, %offset {abs = true, NaN = true}: f32 -> f32 + %r4 = nvvm.redux.sync fmin %value, %offset abs = true nan = true : f32 -> f32 // CHECK: nvvm.redux.sync fmax %{{.*}} %r5 = nvvm.redux.sync fmax %value, %offset: f32 -> f32 // CHECK: nvvm.redux.sync fmax %{{.*}} - %r6 = nvvm.redux.sync fmax %value, %offset {abs = true}: f32 -> f32 + %r6 = nvvm.redux.sync fmax %value, %offset abs = true : f32 -> f32 // CHECK: nvvm.redux.sync fmax %{{.*}} - %r7 = nvvm.redux.sync fmax %value, %offset {NaN = true}: f32 -> f32 + %r7 = nvvm.redux.sync fmax %value, %offset nan = true : f32 -> f32 // CHECK: nvvm.redux.sync fmax %{{.*}} - %r8 = nvvm.redux.sync fmax %value, %offset {abs = true, NaN = true}: f32 -> f32 + %r8 = nvvm.redux.sync fmax %value, %offset abs = true nan = true : f32 -> f32 llvm.return %r1 : f32 } @@ -542,10 +542,10 @@ func.func @dot_accumulate_4way(%a_vec: vector<4xi8>, %b_vec: vector<4xi8>, %c: i // CHECK-LABEL: @dot_accumulate_2way func.func @dot_accumulate_2way(%a_vec: vector<2xi16>, %b_vec: vector<4xi8>, %c: i32) { - // CHECK: nvvm.dot.accumulate.2way %{{.*}}, %{{.*}}, %{{.*}} {b_hi = false} : vector<2xi16>, vector<4xi8> - %1 = nvvm.dot.accumulate.2way %a_vec , %b_vec , %c {b_hi = false}: vector<2xi16>, vector<4xi8> - // CHECK: nvvm.dot.accumulate.2way %{{.*}}, %{{.*}}, %{{.*}} {b_hi = true} : vector<2xi16>, vector<4xi8> - %3 = nvvm.dot.accumulate.2way %a_vec , %b_vec , %c {b_hi = true}: vector<2xi16>, vector<4xi8> + // CHECK: nvvm.dot.accumulate.2way %{{.*}}, %{{.*}}, %{{.*}} <{b_hi = false}> : vector<2xi16>, vector<4xi8> + %1 = nvvm.dot.accumulate.2way %a_vec , %b_vec , %c <{b_hi = false}>: vector<2xi16>, vector<4xi8> + // CHECK: nvvm.dot.accumulate.2way %{{.*}}, %{{.*}}, %{{.*}} <{b_hi = true}> : vector<2xi16>, vector<4xi8> + %3 = nvvm.dot.accumulate.2way %a_vec , %b_vec , %c <{b_hi = true}>: vector<2xi16>, vector<4xi8> return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir index 3e763a036f2d3..816e26024c4d8 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir @@ -49,7 +49,7 @@ llvm.func @invalid_dst_type_f4x4_f6(%src : vector<4xf32>, %rbits : i32) -> i16 { // Test invalid rounding modes for non-stochastic ops llvm.func @convert_float_to_tf32_rs_not_supported(%src : f32) -> i32 { // expected-error @below {{Only {rn,rz,rna} rounding modes supported for ConvertFloatToTF32Op.}} - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode}> llvm.return %res : i32 } @@ -57,7 +57,7 @@ llvm.func @convert_float_to_tf32_rs_not_supported(%src : f32) -> i32 { llvm.func @convert_f32x2_to_f8x2_rs_not_supported(%a : f32, %b : f32) { // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E4M3FN) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E4M3FN) llvm.return } @@ -65,6 +65,6 @@ llvm.func @convert_f32x2_to_f8x2_rs_not_supported(%a : f32, %b : f32) { llvm.func @convert_bf16x2_to_f8x2_rs_not_supported(%src : vector<2xbf16>) { // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from bf16x2 to 'f8E8M0FNU' type}} - %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> i16 (f8E8M0FNU) + %res = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> i16 (f8E8M0FNU) llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir b/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir index ff90ad47ba410..794ead73a32ee 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm_check_target_sm.mlir @@ -73,7 +73,7 @@ gpu.module @tcgen05_commit_sm100 [#nvvm.target] { gpu.module @tcgen05_cp_sm90a [#nvvm.target] { func.func @tcgen05_cp_sm90a(%taddr: !llvm.ptr<6>, %sdesc: i64) { // expected-error @below {{'nvvm.tcgen05.cp' op is not supported on sm_90a}} - nvvm.tcgen05.cp %taddr, %sdesc {shape = #nvvm.tcgen05_cp_shape} + nvvm.tcgen05.cp %taddr, %sdesc <{shape = #nvvm.tcgen05_cp_shape}> return } } @@ -83,7 +83,7 @@ gpu.module @tcgen05_cp_sm90a [#nvvm.target] { gpu.module @tcgen05_ld_sm90 [#nvvm.target] { func.func @tcgen05_ld_sm90(%taddr: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.ld' op is not supported on sm_90}} - %0 = nvvm.tcgen05.ld %taddr {shape = #nvvm.tcgen05_ldst_shape} : i32 + %0 = nvvm.tcgen05.ld %taddr <{shape = #nvvm.tcgen05_ldst_shape}> : i32 return } } @@ -93,7 +93,7 @@ gpu.module @tcgen05_ld_sm90 [#nvvm.target] { gpu.module @tcgen05_st_sm120f [#nvvm.target] { func.func @tcgen05_st_sm120f(%taddr: !llvm.ptr<6>, %val: i32) { // expected-error @below {{'nvvm.tcgen05.st' op is not supported on sm_120f}} - nvvm.tcgen05.st %taddr, %val {shape = #nvvm.tcgen05_ldst_shape} : i32 + nvvm.tcgen05.st %taddr, %val <{shape = #nvvm.tcgen05_ldst_shape}> : i32 return } } @@ -103,7 +103,7 @@ gpu.module @tcgen05_st_sm120f [#nvvm.target] { gpu.module @tcgen05_mma_sm90 [#nvvm.target] { func.func @tcgen05_mma_sm90(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1) { // expected-error @below {{'nvvm.tcgen05.mma' op is not supported on sm_90}} - nvvm.tcgen05.mma %d, %a, %b, %idesc, %eid {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + nvvm.tcgen05.mma %d, %a, %b, %idesc, %eid <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) return } } @@ -113,7 +113,7 @@ gpu.module @tcgen05_mma_sm90 [#nvvm.target] { gpu.module @tcgen05_mma_sp_sm100 [#nvvm.target] { func.func @tcgen05_mma_sp_sm100(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.mma.sp' op is not supported on sm_100}} - nvvm.tcgen05.mma.sp %d, %a, %b, %idesc, %eid, %sp {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + nvvm.tcgen05.mma.sp %d, %a, %b, %idesc, %eid, %sp <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) return } } @@ -123,7 +123,7 @@ gpu.module @tcgen05_mma_sp_sm100 [#nvvm.target] { gpu.module @tcgen05_mma_block_scale_sm90a [#nvvm.target] { func.func @tcgen05_mma_block_scale_sm90a(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sa: !llvm.ptr<6>, %sb: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.mma.block_scale' op is not supported on sm_90a}} - nvvm.tcgen05.mma.block_scale %d, %a, %b, %idesc, %eid, %sa, %sb {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + nvvm.tcgen05.mma.block_scale %d, %a, %b, %idesc, %eid, %sa, %sb <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) return } } @@ -133,7 +133,7 @@ gpu.module @tcgen05_mma_block_scale_sm90a [#nvvm.target] { gpu.module @tcgen05_mma_sp_block_scale_sm90 [#nvvm.target] { func.func @tcgen05_mma_sp_block_scale_sm90(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>, %sa: !llvm.ptr<6>, %sb: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.mma.sp.block_scale' op is not supported on sm_90}} - nvvm.tcgen05.mma.sp.block_scale %d, %a, %b, %idesc, %eid, %sp, %sa, %sb {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + nvvm.tcgen05.mma.sp.block_scale %d, %a, %b, %idesc, %eid, %sp, %sa, %sb <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) return } } @@ -143,7 +143,7 @@ gpu.module @tcgen05_mma_sp_block_scale_sm90 [#nvvm.target] { gpu.module @tcgen05_mma_ws_sm120f [#nvvm.target] { func.func @tcgen05_mma_ws_sm120f(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1) { // expected-error @below {{'nvvm.tcgen05.mma.ws' op is not supported on sm_120f}} - nvvm.tcgen05.mma.ws %d, %a, %b, %idesc, %eid {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) + nvvm.tcgen05.mma.ws %d, %a, %b, %idesc, %eid <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1) return } } @@ -153,7 +153,7 @@ gpu.module @tcgen05_mma_ws_sm120f [#nvvm.target] { gpu.module @tcgen05_mma_ws_sp_sm90a [#nvvm.target] { func.func @tcgen05_mma_ws_sp_sm90a(%d: !llvm.ptr<6>, %a: i64, %b: i64, %idesc: i32, %eid: i1, %sp: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.mma.ws.sp' op is not supported on sm_90a}} - nvvm.tcgen05.mma.ws.sp %d, %a, %b, %idesc, %eid, %sp {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + nvvm.tcgen05.mma.ws.sp %d, %a, %b, %idesc, %eid, %sp <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) return } } @@ -193,7 +193,7 @@ gpu.module @tcgen05_shift_sm100 [#nvvm.target] { gpu.module @tcgen05_ld_red_sm100a [#nvvm.target] { func.func @tcgen05_ld_red_sm100a(%addr: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.ld.red' op is not supported on sm_100a}} - %data, %rv = nvvm.tcgen05.ld.red min %addr {shape = #nvvm.tcgen05_ldst_shape} : vector<2xi32>, i32 + %data, %rv = nvvm.tcgen05.ld.red min %addr <{shape = #nvvm.tcgen05_ldst_shape}> : vector<2xi32>, i32 return } } @@ -203,7 +203,7 @@ gpu.module @tcgen05_ld_red_sm100a [#nvvm.target] { gpu.module @tcgen05_ld_red_sm90a [#nvvm.target] { func.func @tcgen05_ld_red_sm90a(%addr: !llvm.ptr<6>) { // expected-error @below {{'nvvm.tcgen05.ld.red' op is not supported on sm_90a}} - %data, %rv = nvvm.tcgen05.ld.red min %addr {shape = #nvvm.tcgen05_ldst_shape} : vector<2xi32>, i32 + %data, %rv = nvvm.tcgen05.ld.red min %addr <{shape = #nvvm.tcgen05_ldst_shape}> : vector<2xi32>, i32 return } } diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf.mlir index fd05c85ae441f..0d28e282bcd27 100644 --- a/mlir/test/Target/LLVMIR/nvvm/addf/addf.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf.mlir @@ -10,9 +10,9 @@ llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 { // CHECK-NEXT: ret half %6 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : f16 - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f16 - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f16 - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f16 + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f16 + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f16 + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f16 llvm.return %f4 : f16 } @@ -24,7 +24,7 @@ llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 { // CHECK-NEXT: ret bfloat %4 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : bf16 - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : bf16 + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : bf16 llvm.return %f2 : bf16 } @@ -51,22 +51,22 @@ llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 { // CHECK-NEXT: ret float %19 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : f32 - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f32 - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f6 = nvvm.addf %f5, %f5 {rnd = #nvvm.fp_rnd_mode} : f32 - %f7 = nvvm.addf %f6, %f6 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f8 = nvvm.addf %f7, %f7 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f9 = nvvm.addf %f8, %f8 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f10 = nvvm.addf %f9, %f9 {rnd = #nvvm.fp_rnd_mode} : f32 - %f11 = nvvm.addf %f10, %f10 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f12 = nvvm.addf %f11, %f11 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f13 = nvvm.addf %f12, %f12 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f14 = nvvm.addf %f13, %f13 {rnd = #nvvm.fp_rnd_mode} : f32 - %f15 = nvvm.addf %f14, %f14 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f16 = nvvm.addf %f15, %f15 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f17 = nvvm.addf %f16, %f16 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f5 = nvvm.addf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f6 = nvvm.addf %f5, %f5 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f7 = nvvm.addf %f6, %f6 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f8 = nvvm.addf %f7, %f7 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f9 = nvvm.addf %f8, %f8 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f10 = nvvm.addf %f9, %f9 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f11 = nvvm.addf %f10, %f10 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f12 = nvvm.addf %f11, %f11 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f13 = nvvm.addf %f12, %f12 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f14 = nvvm.addf %f13, %f13 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f15 = nvvm.addf %f14, %f14 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f16 = nvvm.addf %f15, %f15 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f17 = nvvm.addf %f16, %f16 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 llvm.return %f17 : f32 } @@ -81,9 +81,9 @@ llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 { // CHECK-NEXT: ret double %7 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : f64 - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f64 - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode} : f64 - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode} : f64 - %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode} : f64 + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f5 = nvvm.addf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode}> : f64 llvm.return %f5 : f64 } diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir index 23ba79ee3d8af..e3b6ef56312d0 100644 --- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{ attribute 'sat' failed to satisfy constraint: Describes the saturation mode whose value is one of {none, sat}}} - %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode} : f16 + %f1 = nvvm.addf %a, %b <{sat = #nvvm.sat_mode}> : f16 llvm.return %f1 : f16 } @@ -12,7 +12,7 @@ llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { // expected-error@+1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}} - %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode, ftz=true} : f64 + %f1 = nvvm.addf %a, %b <{sat = #nvvm.sat_mode, ftz=true}> : f64 llvm.return %f1 : f64 } @@ -20,7 +20,7 @@ llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -28,7 +28,7 @@ llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> { // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -36,7 +36,7 @@ llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) - llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : bf16 + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : bf16 llvm.return %f1 : bf16 } @@ -44,7 +44,7 @@ llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> { // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } @@ -52,7 +52,7 @@ llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16> llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode, ftz=true} : bf16 + %f1 = nvvm.addf %a, %b <{sat = #nvvm.sat_mode, ftz=true}> : bf16 llvm.return %f1 : bf16 } @@ -62,6 +62,6 @@ llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { // available. llvm.func @addf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.addf %a, %b {ftz=true} : f16 + %f1 = nvvm.addf %a, %b <{ftz=true}> : f16 llvm.return %f1 : f16 } diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector.mlir index b472de739c92a..d1bd352f202f9 100644 --- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector.mlir @@ -10,9 +10,9 @@ llvm.func @addf_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector // CHECK-NEXT: ret <2 x half> %3 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : vector<2xf16> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf16> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -24,7 +24,7 @@ llvm.func @addf_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> ve // CHECK-NEXT: ret <2 x bfloat> %4 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : vector<2xbf16> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f2 : vector<2xbf16> } @@ -74,10 +74,10 @@ llvm.func @addf_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: ret <2 x float> %34 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : vector<2xf32> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f5 = nvvm.addf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -117,10 +117,10 @@ llvm.func @addf_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1 // CHECK-NEXT: ret <2 x float> %34 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -160,10 +160,10 @@ llvm.func @addf_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1 // CHECK-NEXT: ret <2 x float> %34 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -203,10 +203,10 @@ llvm.func @addf_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1 // CHECK-NEXT: ret <2 x float> %34 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.addf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.addf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -232,7 +232,7 @@ llvm.func @addf_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: ret <2 x double> %18 // CHECK-NEXT: } %f1 = nvvm.addf %a, %b : vector<2xf64> - %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f2 = nvvm.addf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f2 : vector<2xf64> } @@ -248,7 +248,7 @@ llvm.func @addf_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1 // CHECK-NEXT: ret <2 x double> %10 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } @@ -264,7 +264,7 @@ llvm.func @addf_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1 // CHECK-NEXT: ret <2 x double> %10 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } @@ -280,6 +280,6 @@ llvm.func @addf_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1 // CHECK-NEXT: ret <2 x double> %10 // CHECK-NEXT: } - %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.addf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp16x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp16x2.mlir index a4bece83f832a..efa3d2ba34210 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_fp16x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp16x2.mlir @@ -3,13 +3,13 @@ // CHECK-LABEL: @convert_f32x2_to_f16x2_rn llvm.func @convert_f32x2_to_f16x2_rn(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rn(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> + %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float %{{.*}}, float %{{.*}}) - %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xf16> + %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode} : vector<2xf16> + %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode}> : vector<2xf16> llvm.return } @@ -17,13 +17,13 @@ llvm.func @convert_f32x2_to_f16x2_rn(%srcA : f32, %srcB : f32) { // CHECK-LABEL: @convert_f32x2_to_f16x2_rz llvm.func @convert_f32x2_to_f16x2_rz(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rz(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> + %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float %{{.*}}, float %{{.*}}) - %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xf16> + %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode} : vector<2xf16> + %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode}> : vector<2xf16> llvm.return } @@ -31,13 +31,13 @@ llvm.func @convert_f32x2_to_f16x2_rz(%srcA : f32, %srcB : f32) { // CHECK-LABEL: @convert_f32x2_to_f16x2_rs_stochastic llvm.func @convert_f32x2_to_f16x2_rs_stochastic(%srcA : f32, %srcB : f32, %rbits : i32) { // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res1 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true, rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res2 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits <{relu = true, rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> + %res3 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> + %res4 = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> llvm.return } @@ -47,13 +47,13 @@ llvm.func @convert_f32x2_to_f16x2_rs_stochastic(%srcA : f32, %srcB : f32, %rbits // CHECK-LABEL: @convert_f32x2_to_bf16x2_rn llvm.func @convert_f32x2_to_bf16x2_rn(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> + %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float %{{.*}}, float %{{.*}}) - %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xbf16> + %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode} : vector<2xbf16> + %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode}> : vector<2xbf16> llvm.return } @@ -61,13 +61,13 @@ llvm.func @convert_f32x2_to_bf16x2_rn(%srcA : f32, %srcB : f32) { // CHECK-LABEL: @convert_f32x2_to_bf16x2_rz llvm.func @convert_f32x2_to_bf16x2_rz(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> + %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float %{{.*}}, float %{{.*}}) - %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xbf16> + %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode} : vector<2xbf16> + %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, relu = true, sat = #nvvm.sat_mode}> : vector<2xbf16> llvm.return } @@ -75,13 +75,13 @@ llvm.func @convert_f32x2_to_bf16x2_rz(%srcA : f32, %srcB : f32) { // CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_stochastic llvm.func @convert_f32x2_to_bf16x2_rs_stochastic(%srcA : f32, %srcB : f32, %rbits : i32) { // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res1 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true, rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res2 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits <{relu = true, rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> + %res3 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) - %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> + %res4 = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir index 3d3bd714fa8fa..f0fe51eb26fd8 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir @@ -7,7 +7,7 @@ llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) { %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN) // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8 - %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN) + %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB <{relu = true}> : i8 (f4E2M1FN) llvm.return } @@ -20,7 +20,7 @@ llvm.func @convert_f16x2_to_f4x2(%srcA : vector<2xf16>) { %res1 = nvvm.convert.f16x2.to.f4x2 %srcA : vector<2xf16> -> i8 (f4E2M1FN) // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.f16x2.to.e2m1x2.rn.relu.satfinite(<2 x half> %{{.*}}) // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8 - %res2 = nvvm.convert.f16x2.to.f4x2 %srcA {relu = true} : vector<2xf16> -> i8 (f4E2M1FN) + %res2 = nvvm.convert.f16x2.to.f4x2 %srcA <{relu = true}> : vector<2xf16> -> i8 (f4E2M1FN) llvm.return } @@ -33,7 +33,7 @@ llvm.func @convert_bf16x2_to_f4x2(%srcA : vector<2xbf16>) { %res1 = nvvm.convert.bf16x2.to.f4x2 %srcA : vector<2xbf16> -> i8 (f4E2M1FN) // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.bf16x2.to.e2m1x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8 - %res2 = nvvm.convert.bf16x2.to.f4x2 %srcA {relu = true} : vector<2xbf16> -> i8 (f4E2M1FN) + %res2 = nvvm.convert.bf16x2.to.f4x2 %srcA <{relu = true}> : vector<2xbf16> -> i8 (f4E2M1FN) llvm.return } @@ -46,6 +46,6 @@ llvm.func @convert_f4x2_to_f16x2(%src : i8) { %res1 = nvvm.convert.f4x2.to.f16x2 %src : i8 (f4E2M1FN)-> vector<2xf16> // CHECK: %[[res2:.*]] = zext i8 %{{.*}} to i16 // CHECK-NEXT: %{{.*}} = call <2 x half> @llvm.nvvm.e2m1x2.to.f16x2.rn.relu(i16 %[[res2]]) - %res2 = nvvm.convert.f4x2.to.f16x2 %src {relu = true} : i8 (f4E2M1FN)-> vector<2xf16> + %res2 = nvvm.convert.f4x2.to.f16x2 %src <{relu = true}> : i8 (f4E2M1FN)-> vector<2xf16> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir index 8d9e5ff2a6a82..8d9d82b9a6f7a 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir @@ -5,7 +5,7 @@ llvm.func @convert_f32x2_to_fp6x2_e2m3(%srcA : f32, %srcB : f32) { //CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e2m3x2.rn.satfinite(float %{{.*}}, float %{{.*}}) %res1 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : i16 (f6E2M3FN) //CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e2m3x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB {relu = true} : i16 (f6E2M3FN) + %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB <{relu = true}> : i16 (f6E2M3FN) llvm.return } @@ -14,7 +14,7 @@ llvm.func @convert_f32x2_to_fp6x2_e3m2(%srcA : f32, %srcB : f32) { //CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e3m2x2.rn.satfinite(float %{{.*}}, float %{{.*}}) %res1 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : i16 (f6E3M2FN) //CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e3m2x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB {relu = true} : i16 (f6E3M2FN) + %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB <{relu = true}> : i16 (f6E3M2FN) llvm.return } @@ -36,7 +36,7 @@ llvm.func @convert_f16x2_to_fp6x2_e2m3(%srcA : vector<2xf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e2m3x2.rn.satfinite(<2 x half> %{{.*}}) %res1 = nvvm.convert.f16x2.to.f6x2 %srcA : vector<2xf16> -> i16 (f6E2M3FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e2m3x2.rn.relu.satfinite(<2 x half> %{{.*}}) - %res2 = nvvm.convert.f16x2.to.f6x2 %srcA {relu = true} : vector<2xf16> -> i16 (f6E2M3FN) + %res2 = nvvm.convert.f16x2.to.f6x2 %srcA <{relu = true}> : vector<2xf16> -> i16 (f6E2M3FN) llvm.return } @@ -45,7 +45,7 @@ llvm.func @convert_f16x2_to_fp6x2_e3m2(%srcA : vector<2xf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e3m2x2.rn.satfinite(<2 x half> %{{.*}}) %res1 = nvvm.convert.f16x2.to.f6x2 %srcA : vector<2xf16> -> i16 (f6E3M2FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e3m2x2.rn.relu.satfinite(<2 x half> %{{.*}}) - %res2 = nvvm.convert.f16x2.to.f6x2 %srcA {relu = true} : vector<2xf16> -> i16 (f6E3M2FN) + %res2 = nvvm.convert.f16x2.to.f6x2 %srcA <{relu = true}> : vector<2xf16> -> i16 (f6E3M2FN) llvm.return } @@ -67,7 +67,7 @@ llvm.func @convert_bf16x2_to_fp6x2_e2m3(%srcA : vector<2xbf16>, %scale_factor : // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e2m3x2.rn.satfinite(<2 x bfloat> %{{.*}}) %res1 = nvvm.convert.bf16x2.to.f6x2 %srcA : vector<2xbf16> -> i16 (f6E2M3FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e2m3x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) - %res2 = nvvm.convert.bf16x2.to.f6x2 %srcA {relu = true} : vector<2xbf16> -> i16 (f6E2M3FN) + %res2 = nvvm.convert.bf16x2.to.f6x2 %srcA <{relu = true}> : vector<2xbf16> -> i16 (f6E2M3FN) llvm.return } @@ -76,7 +76,7 @@ llvm.func @convert_bf16x2_to_fp6x2_e3m2(%srcA : vector<2xbf16>, %scale_factor : // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e3m2x2.rn.satfinite(<2 x bfloat> %{{.*}}) %res1 = nvvm.convert.bf16x2.to.f6x2 %srcA : vector<2xbf16> -> i16 (f6E3M2FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e3m2x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) - %res2 = nvvm.convert.bf16x2.to.f6x2 %srcA {relu = true} : vector<2xbf16> -> i16 (f6E3M2FN) + %res2 = nvvm.convert.bf16x2.to.f6x2 %srcA <{relu = true}> : vector<2xbf16> -> i16 (f6E3M2FN) llvm.return } @@ -100,7 +100,7 @@ llvm.func @convert_f6x2_to_f16x2_e2m3(%src : vector<2xi8>) { %res1 = nvvm.convert.f6x2.to.f16x2 %src : vector<2xi8> (f6E2M3FN)-> vector<2xf16> // CHECK: %[[res2:.*]] = bitcast <2 x i8> %{{.*}} to i16 // CHECK-NEXT: %{{.*}} = call <2 x half> @llvm.nvvm.e2m3x2.to.f16x2.rn.relu(i16 %[[res2]]) - %res2 = nvvm.convert.f6x2.to.f16x2 %src {relu = true} : vector<2xi8> (f6E2M3FN)-> vector<2xf16> + %res2 = nvvm.convert.f6x2.to.f16x2 %src <{relu = true}> : vector<2xi8> (f6E2M3FN)-> vector<2xf16> llvm.return } @@ -111,6 +111,6 @@ llvm.func @convert_f6x2_to_f16x2_e3m2(%src : vector<2xi8>) { %res1 = nvvm.convert.f6x2.to.f16x2 %src : vector<2xi8> (f6E3M2FN)-> vector<2xf16> // CHECK: %[[res2:.*]] = bitcast <2 x i8> %{{.*}} to i16 // CHECK-NEXT: %{{.*}} = call <2 x half> @llvm.nvvm.e3m2x2.to.f16x2.rn.relu(i16 %[[res2]]) - %res2 = nvvm.convert.f6x2.to.f16x2 %src {relu = true} : vector<2xi8> (f6E3M2FN)-> vector<2xf16> + %res2 = nvvm.convert.f6x2.to.f16x2 %src <{relu = true}> : vector<2xi8> (f6E3M2FN)-> vector<2xf16> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir index d8002d790b6a2..867d180076100 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir @@ -5,31 +5,31 @@ // CHECK-LABEL: @convert_f32x2_to_f8x2_e4m3 llvm.func @convert_f32x2_to_f8x2_e4m3(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e4m3x2.rn(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E4M3FN) + %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E4M3FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e4m3x2.rn.relu(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E4M3FN) + %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E4M3FN) llvm.return } // CHECK-LABEL: @convert_f32x2_to_f8x2_e5m2 llvm.func @convert_f32x2_to_f8x2_e5m2(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e5m2x2.rn(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E5M2) + %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E5M2) // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e5m2x2.rn.relu(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E5M2) + %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E5M2) llvm.return } // CHECK-LABEL: @convert_f32x2_to_f8x2_ue8m0 llvm.func @convert_f32x2_to_f8x2_ue8m0(%srcA : f32, %srcB : f32) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rz(float %{{.*}}, float %{{.*}}) - %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : i16 (f8E8M0FNU) + %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rp(float %{{.*}}, float %{{.*}}) - %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode} : i16 (f8E8M0FNU) + %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode}> : i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rz.satfinite(float %{{.*}}, float %{{.*}}) - %res3 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E8M0FNU) + %res3 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rp.satfinite(float %{{.*}}, float %{{.*}}) - %res4 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E8M0FNU) + %res4 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E8M0FNU) llvm.return } @@ -37,10 +37,10 @@ llvm.func @convert_f32x2_to_f8x2_ue8m0(%srcA : f32, %srcB : f32) { llvm.func @convert_f32x2_to_f8x2_vector_return(%srcA : f32, %srcB : f32) { // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e4m3x2.rn(float %{{.*}}, float %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8> - %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xi8> (f8E4M3FN) + %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xi8> (f8E4M3FN) // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e4m3x2.rn.relu(float %{{.*}}, float %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res2]] to <2 x i8> - %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xi8> (f8E4M3FN) + %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xi8> (f8E4M3FN) llvm.return } @@ -51,7 +51,7 @@ llvm.func @convert_f16x2_to_f8x2_e4m3(%src : vector<2xf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> %{{.*}}) %res1 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> i16 (f8E4M3FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> %{{.*}}) - %res2 = nvvm.convert.f16x2.to.f8x2 %src {relu = true} : vector<2xf16> -> i16 (f8E4M3FN) + %res2 = nvvm.convert.f16x2.to.f8x2 %src <{relu = true}> : vector<2xf16> -> i16 (f8E4M3FN) llvm.return } @@ -60,7 +60,7 @@ llvm.func @convert_f16x2_to_f8x2_e5m2(%src : vector<2xf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> %{{.*}}) %res1 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> i16 (f8E5M2) // CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> %{{.*}}) - %res2 = nvvm.convert.f16x2.to.f8x2 %src {relu = true} : vector<2xf16> -> i16 (f8E5M2) + %res2 = nvvm.convert.f16x2.to.f8x2 %src <{relu = true}> : vector<2xf16> -> i16 (f8E5M2) llvm.return } @@ -80,13 +80,13 @@ llvm.func @convert_f16x2_to_f8x2_vector_return(%src : vector<2xf16>) { // CHECK-LABEL: @convert_bf16x2_to_f8x2_ue8m0 llvm.func @convert_bf16x2_to_f8x2_ue8m0(%src : vector<2xbf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz(<2 x bfloat> %{{.*}}) - %res1 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> i16 (f8E8M0FNU) + %res1 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp(<2 x bfloat> %{{.*}}) - %res2 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> i16 (f8E8M0FNU) + %res2 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz.satfinite(<2 x bfloat> %{{.*}}) - %res3 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> i16 (f8E8M0FNU) + %res3 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> i16 (f8E8M0FNU) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp.satfinite(<2 x bfloat> %{{.*}}) - %res4 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> i16 (f8E8M0FNU) + %res4 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> i16 (f8E8M0FNU) llvm.return } @@ -94,18 +94,18 @@ llvm.func @convert_bf16x2_to_f8x2_ue8m0(%src : vector<2xbf16>) { // CHECK-LABEL: @convert_bf16x2_to_f8x2_e4m3 llvm.func @convert_bf16x2_to_f8x2_e4m3(%srcA : vector<2xbf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e4m3x2.rn.satfinite(<2 x bfloat> %{{.*}}) - %res1 = nvvm.convert.bf16x2.to.f8x2 %srcA {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> i16 (f8E4M3FN) + %res1 = nvvm.convert.bf16x2.to.f8x2 %srcA <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> i16 (f8E4M3FN) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e4m3x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) - %res2 = nvvm.convert.bf16x2.to.f8x2 %srcA {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> i16 (f8E4M3FN) + %res2 = nvvm.convert.bf16x2.to.f8x2 %srcA <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> i16 (f8E4M3FN) llvm.return } // CHECK-LABEL: @convert_bf16x2_to_f8x2_e5m2 llvm.func @convert_bf16x2_to_f8x2_e5m2(%srcA : vector<2xbf16>) { // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e5m2x2.rn.satfinite(<2 x bfloat> %{{.*}}) - %res1 = nvvm.convert.bf16x2.to.f8x2 %srcA {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> vector<2xi8> (f8E5M2) + %res1 = nvvm.convert.bf16x2.to.f8x2 %srcA <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> vector<2xi8> (f8E5M2) // CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.e5m2x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) - %res2 = nvvm.convert.bf16x2.to.f8x2 %srcA {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> vector<2xi8> (f8E5M2) + %res2 = nvvm.convert.bf16x2.to.f8x2 %srcA <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> vector<2xi8> (f8E5M2) llvm.return } @@ -113,16 +113,16 @@ llvm.func @convert_bf16x2_to_f8x2_e5m2(%srcA : vector<2xbf16>) { llvm.func @convert_bf16x2_to_f8x2_vector_return(%src : vector<2xbf16>) { // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz(<2 x bfloat> %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8> - %res1 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) + %res1 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp.satfinite(<2 x bfloat> %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res2]] to <2 x i8> - %res2 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) + %res2 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) // CHECK: %[[res3:.*]] = call i16 @llvm.nvvm.bf16x2.to.e4m3x2.rn.satfinite(<2 x bfloat> %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res3]] to <2 x i8> - %res3 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) + %res3 = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) // CHECK: %[[res4:.*]] = call i16 @llvm.nvvm.bf16x2.to.e5m2x2.rn.relu.satfinite(<2 x bfloat> %{{.*}}) // CHECK-NEXT: %{{.*}} = bitcast i16 %[[res4]] to <2 x i8> - %res4 = nvvm.convert.bf16x2.to.f8x2 %src {relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xbf16> -> vector<2xi8> (f8E5M2) + %res4 = nvvm.convert.bf16x2.to.f8x2 %src <{relu = true, rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xbf16> -> vector<2xi8> (f8E5M2) llvm.return } @@ -135,7 +135,7 @@ llvm.func @convert_f8x2_to_f16x2_e4m3(%src : vector<2xi8>) { %res1 = nvvm.convert.f8x2.to.f16x2 %src : vector<2xi8> (f8E4M3FN)-> vector<2xf16> // CHECK: %[[res2:.*]] = bitcast <2 x i8> %{{.*}} to i16 // CHECK-NEXT: %{{.*}} = call <2 x half> @llvm.nvvm.e4m3x2.to.f16x2.rn.relu(i16 %[[res2]]) - %res2 = nvvm.convert.f8x2.to.f16x2 %src {relu = true} : vector<2xi8> (f8E4M3FN)-> vector<2xf16> + %res2 = nvvm.convert.f8x2.to.f16x2 %src <{relu = true}> : vector<2xi8> (f8E4M3FN)-> vector<2xf16> llvm.return } @@ -146,7 +146,7 @@ llvm.func @convert_f8x2_to_f16x2_e5m2(%src : vector<2xi8>) { %res1 = nvvm.convert.f8x2.to.f16x2 %src : vector<2xi8> (f8E5M2)-> vector<2xf16> // CHECK: %[[res2:.*]] = bitcast <2 x i8> %{{.*}} to i16 // CHECK-NEXT: %{{.*}} = call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 %[[res2]]) - %res2 = nvvm.convert.f8x2.to.f16x2 %src {relu = true} : vector<2xi8> (f8E5M2)-> vector<2xf16> + %res2 = nvvm.convert.f8x2.to.f16x2 %src <{relu = true}> : vector<2xi8> (f8E5M2)-> vector<2xf16> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2_invalid.mlir index 747706dfc3418..6756862463047 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2_invalid.mlir @@ -12,7 +12,7 @@ llvm.func @convert_bf16x2_to_f8x2_invalid_type(%src : vector<2xbf16>) { llvm.func @convert_bf16x2_to_f8x2_invalid_rounding_1(%src : vector<2xbf16>) { // expected-error @below {{Only RN rounding mode is supported for conversions from bf16x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) + %res = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) llvm.return } @@ -20,7 +20,7 @@ llvm.func @convert_bf16x2_to_f8x2_invalid_rounding_1(%src : vector<2xbf16>) { llvm.func @nvvm_cvt_bf16x2_to_f8x2_invalid_rounding_2(%src : vector<2xbf16>) { // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from bf16x2 to 'f8E8M0FNU' type}} - %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) + %res = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) llvm.return } @@ -28,7 +28,7 @@ llvm.func @nvvm_cvt_bf16x2_to_f8x2_invalid_rounding_2(%src : vector<2xbf16>) { llvm.func @convert_bf16x2_to_f8x2_invalid_sat_mode(%src : vector<2xbf16>) { // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from bf16x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.bf16x2.to.f8x2 %src {sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) + %res = nvvm.convert.bf16x2.to.f8x2 %src <{sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> -> vector<2xi8> (f8E4M3FN) llvm.return } @@ -36,6 +36,6 @@ llvm.func @convert_bf16x2_to_f8x2_invalid_sat_mode(%src : vector<2xbf16>) { llvm.func @convert_bf16x2_to_f8x2_invalid_relu(%src : vector<2xbf16>) { // expected-error @below {{relu not supported for conversions to 'f8E8M0FNU' type}} - %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) + %res = nvvm.convert.bf16x2.to.f8x2 %src <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_s2f6x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_s2f6x2.mlir index 7c1aa406a47af..bb1d308137f78 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_s2f6x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_s2f6x2.mlir @@ -8,7 +8,7 @@ llvm.func @convert_f32x2_to_s2f6x2(%srcA : f32, %srcB : f32) -> i16 { // CHECK-NEXT: ret i16 %5 // CHECK-NEXT: } %res1 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB : i16 - %res2 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB {relu = true} : i16 + %res2 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB <{relu = true}> : i16 // Combine results to avoid dead code elimination %final_result = llvm.or %res1, %res2 : i16 @@ -23,7 +23,7 @@ llvm.func @convert_f32x2_to_s2f6x2_scale(%srcA : f32, %srcB : f32, %scale : i16) // CHECK-NEXT: ret i16 %6 // CHECK-NEXT: } %res1 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB, %scale : i16 - %res2 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB, %scale {relu = true} : i16 + %res2 = nvvm.convert.f32x2.to.s2f6x2 %srcA, %srcB, %scale <{relu = true}> : i16 // Combine results to avoid dead code elimination %final_result = llvm.or %res1, %res2 : i16 @@ -58,7 +58,7 @@ llvm.func @convert_bf16x2_to_s2f6x2(%srcA : vector<2xbf16>) -> i16 { // CHECK-NEXT: ret i16 %4 // CHECK-NEXT: } %res1 = nvvm.convert.bf16x2.to.s2f6x2 %srcA : vector<2xbf16> -> i16 - %res2 = nvvm.convert.bf16x2.to.s2f6x2 %srcA {relu = true} : vector<2xbf16> -> i16 + %res2 = nvvm.convert.bf16x2.to.s2f6x2 %srcA <{relu = true}> : vector<2xbf16> -> i16 // Combine results to avoid dead code elimination %final_result = llvm.or %res1, %res2 : i16 @@ -73,7 +73,7 @@ llvm.func @convert_bf16x2_to_s2f6x2_scale(%srcA : vector<2xbf16>, %scale : i16) // CHECK-NEXT: ret i16 %5 // CHECK-NEXT: } %res1 = nvvm.convert.bf16x2.to.s2f6x2 %srcA, %scale : vector<2xbf16> -> i16 - %res2 = nvvm.convert.bf16x2.to.s2f6x2 %srcA, %scale {relu = true} : vector<2xbf16> -> i16 + %res2 = nvvm.convert.bf16x2.to.s2f6x2 %srcA, %scale <{relu = true}> : vector<2xbf16> -> i16 // Combine results to avoid dead code elimination %final_result = llvm.or %res1, %res2 : i16 @@ -118,7 +118,7 @@ llvm.func @convert_s2f6x2_to_bf16x2_relu(%src : vector<2xi8>) -> vector<2xbf16> // CHECK-NEXT: %3 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.relu.scale.n2.ue8m0(i16 %2, i16 32639) // CHECK-NEXT: ret <2 x bfloat> %3 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src {relu = true} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src <{relu = true}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } @@ -140,7 +140,7 @@ llvm.func @convert_s2f6x2_to_bf16x2_scale_relu(%src : vector<2xi8>, %scale : i16 // CHECK-NEXT: %4 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.relu.scale.n2.ue8m0(i16 %3, i16 %1) // CHECK-NEXT: ret <2 x bfloat> %4 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale {relu = true} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale <{relu = true}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } @@ -151,7 +151,7 @@ llvm.func @convert_s2f6x2_to_bf16x2_satfinite(%src : vector<2xi8>) -> vector<2xb // CHECK-NEXT: %3 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.satfinite.scale.n2.ue8m0(i16 %2, i16 32639) // CHECK-NEXT: ret <2 x bfloat> %3 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src {sat = #nvvm.sat_mode} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src <{sat = #nvvm.sat_mode}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } @@ -162,7 +162,7 @@ llvm.func @convert_s2f6x2_to_bf16x2_relu_satfinite(%src : vector<2xi8>) -> vecto // CHECK-NEXT: %3 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.relu.satfinite.scale.n2.ue8m0(i16 %2, i16 32639) // CHECK-NEXT: ret <2 x bfloat> %3 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src {relu = true, sat = #nvvm.sat_mode} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src <{relu = true, sat = #nvvm.sat_mode}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } @@ -173,7 +173,7 @@ llvm.func @convert_s2f6x2_to_bf16x2_scale_satfinite(%src : vector<2xi8>, %scale // CHECK-NEXT: %4 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.satfinite.scale.n2.ue8m0(i16 %3, i16 %1) // CHECK-NEXT: ret <2 x bfloat> %4 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale {sat = #nvvm.sat_mode} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale <{sat = #nvvm.sat_mode}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } @@ -184,6 +184,6 @@ llvm.func @convert_s2f6x2_to_bf16x2_scale_relu_satfinite(%src : vector<2xi8>, %s // CHECK-NEXT: %4 = call <2 x bfloat> @llvm.nvvm.s2f6x2.to.bf16x2.rn.relu.satfinite.scale.n2.ue8m0(i16 %3, i16 %1) // CHECK-NEXT: ret <2 x bfloat> %4 // CHECK-NEXT: } - %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale {relu = true, sat = #nvvm.sat_mode} : vector<2xi8> -> vector<2xbf16> + %res = nvvm.convert.s2f6x2.to.bf16x2 %src, %scale <{relu = true, sat = #nvvm.sat_mode}> : vector<2xi8> -> vector<2xbf16> llvm.return %res : vector<2xbf16> } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir index 03abcddd96cb0..d70d17259a1f1 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir @@ -10,7 +10,7 @@ gpu.module @valid_f16x2_rs_sm_100a [#nvvm.target] { %f1 = llvm.mlir.constant(1.0 : f32) : f32 %f2 = llvm.mlir.constant(2.0 : f32) : f32 %rbits = llvm.mlir.constant(0x12345678 : i32) : i32 - %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> return } } @@ -21,7 +21,7 @@ gpu.module @valid_bf16x2_rs_sm_103a [#nvvm.target] { %f1 = llvm.mlir.constant(1.0 : f32) : f32 %f2 = llvm.mlir.constant(2.0 : f32) : f32 %rbits = llvm.mlir.constant(0 : i32) : i32 - %res = nvvm.convert.f32x2.to.bf16x2 %f1, %f2, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res = nvvm.convert.f32x2.to.bf16x2 %f1, %f2, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> return } } @@ -40,7 +40,7 @@ llvm.func @convert_f32x4_to_f8x4_e4m3_rs(%src : vector<4xf32>, %rbits : i32) -> // CHECK-LABEL: @convert_f32x4_to_f8x4_e4m3_rs_relu llvm.func @convert_f32x4_to_f8x4_e4m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) - %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E4M3FN) + %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits <{relu = true}> : vector<4xf32> -> vector<4xi8> (f8E4M3FN) llvm.return %res : vector<4xi8> } @@ -58,7 +58,7 @@ llvm.func @convert_f32x4_to_f8x4_e5m2_rs(%src : vector<4xf32>, %rbits : i32) -> // CHECK-LABEL: @convert_f32x4_to_f8x4_e5m2_rs_relu llvm.func @convert_f32x4_to_f8x4_e5m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) - %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E5M2) + %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits <{relu = true}> : vector<4xf32> -> vector<4xi8> (f8E5M2) llvm.return %res : vector<4xi8> } @@ -76,7 +76,7 @@ llvm.func @convert_f32x4_to_f6x4_e2m3_rs(%src : vector<4xf32>, %rbits : i32) -> // CHECK-LABEL: @convert_f32x4_to_f6x4_e2m3_rs_relu llvm.func @convert_f32x4_to_f6x4_e2m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) - %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E2M3FN) + %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits <{relu = true}> : vector<4xf32> -> vector<4xi8> (f6E2M3FN) llvm.return %res : vector<4xi8> } @@ -94,7 +94,7 @@ llvm.func @convert_f32x4_to_f6x4_e3m2_rs(%src : vector<4xf32>, %rbits : i32) -> // CHECK-LABEL: @convert_f32x4_to_f6x4_e3m2_rs_relu llvm.func @convert_f32x4_to_f6x4_e3m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) - %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E3M2FN) + %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits <{relu = true}> : vector<4xf32> -> vector<4xi8> (f6E3M2FN) llvm.return %res : vector<4xi8> } @@ -112,7 +112,7 @@ llvm.func @convert_f32x4_to_f4x4_e2m1_rs(%src : vector<4xf32>, %rbits : i32) -> // CHECK-LABEL: @convert_f32x4_to_f4x4_e2m1_rs_relu llvm.func @convert_f32x4_to_f4x4_e2m1_rs_relu(%src : vector<4xf32>, %rbits : i32) -> i16 { // CHECK: %{{.*}} = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) - %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits {relu = true} : vector<4xf32> -> i16 (f4E2M1FN) + %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits <{relu = true}> : vector<4xf32> -> i16 (f4E2M1FN) llvm.return %res : i16 } diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_tf32.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_tf32.mlir index e9db648e42041..fc90aae112b1f 100644 --- a/mlir/test/Target/LLVMIR/nvvm/convert_tf32.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/convert_tf32.mlir @@ -3,69 +3,69 @@ // CHECK-LABEL: @convert_float_to_tf32_rna llvm.func @convert_float_to_tf32_rna(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rna(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rna_sf llvm.func @convert_float_to_tf32_rna_sf(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rn llvm.func @convert_float_to_tf32_rn(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rn_relu llvm.func @convert_float_to_tf32_rn_relu(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn.relu(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, relu=true}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rn_sf llvm.func @convert_float_to_tf32_rn_sf(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn.satfinite(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rn_relu_sf llvm.func @convert_float_to_tf32_rn_relu_sf(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn.relu.satfinite(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true, sat = #nvvm.sat_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, relu=true, sat = #nvvm.sat_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rz llvm.func @convert_float_to_tf32_rz(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rz_relu llvm.func @convert_float_to_tf32_rz_relu(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz.relu(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, relu=true}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rz_sf llvm.func @convert_float_to_tf32_rz_sf(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz.satfinite(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> llvm.return %res : i32 } // CHECK-LABEL: @convert_float_to_tf32_rz_relu_sf llvm.func @convert_float_to_tf32_rz_relu_sf(%src : f32) -> i32 { // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz.relu.satfinite(float %{{.*}}) - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true, sat = #nvvm.sat_mode} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, relu=true, sat = #nvvm.sat_mode}> llvm.return %res : i32 } diff --git a/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir index d2e14637f094f..b577b9436cace 100644 --- a/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/fence-invalid.mlir @@ -2,7 +2,7 @@ llvm.func @fence_sync_restrict() { // expected-error @below {{attribute 'order' failed to satisfy constraint: NVVM Memory Ordering kind whose value is one of {acquire, release}}} - nvvm.fence.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.sync_restrict #nvvm.mem_order llvm.return } @@ -10,7 +10,7 @@ llvm.func @fence_sync_restrict() { llvm.func @fence_sync_restrict() { // expected-error @below {{attribute 'order' failed to satisfy constraint: NVVM Memory Ordering kind whose value is one of {acquire, release}}} - nvvm.fence.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.sync_restrict #nvvm.mem_order llvm.return } @@ -18,7 +18,7 @@ llvm.func @fence_sync_restrict() { llvm.func @fence_proxy() { // expected-error @below {{attribute 'kind' failed to satisfy constraint: Proxy kind whose value is none of {tensormap, generic}}} - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind llvm.return } @@ -26,7 +26,7 @@ llvm.func @fence_proxy() { llvm.func @fence_proxy() { // expected-error @below {{attribute 'kind' failed to satisfy constraint: Proxy kind whose value is none of {tensormap, generic}}} - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind llvm.return } @@ -34,7 +34,7 @@ llvm.func @fence_proxy() { llvm.func @fence_proxy() { // expected-error @below {{async_shared fence requires space attribute}} - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind llvm.return } @@ -42,7 +42,7 @@ llvm.func @fence_proxy() { llvm.func @fence_proxy() { // expected-error @below {{only async_shared fence can have space attribute}} - nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + nvvm.fence.proxy #nvvm.proxy_kind, space = #nvvm.shared_space llvm.return } @@ -66,7 +66,7 @@ llvm.func @fence_proxy_release() { llvm.func @fence_proxy_sync_restrict() { // expected-error @below {{attribute 'order' failed to satisfy constraint: NVVM Memory Ordering kind whose value is one of {acquire, release}}} - nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.proxy.sync_restrict #nvvm.mem_order llvm.return } @@ -74,8 +74,8 @@ llvm.func @fence_proxy_sync_restrict() { llvm.func @fence_proxy_sync_restrict() { // expected-error @below {{only async is supported for to_proxy attribute}} - nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order, toProxy = #nvvm.proxy_kind, - fromProxy = #nvvm.proxy_kind} + nvvm.fence.proxy.sync_restrict #nvvm.mem_order + from_proxy = #nvvm.proxy_kind to_proxy = #nvvm.proxy_kind llvm.return } @@ -83,7 +83,7 @@ llvm.func @fence_proxy_sync_restrict() { llvm.func @fence_proxy_sync_restrict() { // expected-error @below {{only generic is support for from_proxy attribute}} - nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order, toProxy = #nvvm.proxy_kind, - fromProxy = #nvvm.proxy_kind} + nvvm.fence.proxy.sync_restrict #nvvm.mem_order + from_proxy = #nvvm.proxy_kind to_proxy = #nvvm.proxy_kind llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/fence.mlir b/mlir/test/Target/LLVMIR/nvvm/fence.mlir index 0ab4cb74b8f54..aa25fef72c5cc 100644 --- a/mlir/test/Target/LLVMIR/nvvm/fence.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/fence.mlir @@ -10,9 +10,9 @@ llvm.func @llvm_nvvm_fence_sc_cluster() { // CHECK-LABEL: @nvvm_fence_sync_restrict llvm.func @nvvm_fence_sync_restrict() { // CHECK: call void @llvm.nvvm.fence.acquire.sync_restrict.space.cluster.scope.cluster() - nvvm.fence.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.sync_restrict #nvvm.mem_order // CHECK: call void @llvm.nvvm.fence.release.sync_restrict.space.cta.scope.cluster() - nvvm.fence.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.sync_restrict #nvvm.mem_order llvm.return } @@ -26,28 +26,28 @@ llvm.func @fence_mbarrier_init() { // CHECK-LABEL: @nvvm_fence_proxy llvm.func @nvvm_fence_proxy() { // CHECK: call void @llvm.nvvm.fence.proxy.alias() - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind // CHECK: call void @llvm.nvvm.fence.proxy.async() - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind // CHECK: call void @llvm.nvvm.fence.proxy.async.global() - nvvm.fence.proxy {kind = #nvvm.proxy_kind} + nvvm.fence.proxy #nvvm.proxy_kind // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cta() - nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + nvvm.fence.proxy #nvvm.proxy_kind, space = #nvvm.shared_space // CHECK: call void @llvm.nvvm.fence.proxy.async.shared_cluster() - nvvm.fence.proxy {kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + nvvm.fence.proxy #nvvm.proxy_kind, space = #nvvm.shared_space llvm.return } // CHECK-LABEL: @nvvm_fence_proxy_sync_restrict llvm.func @nvvm_fence_proxy_sync_restrict() { // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.acquire.sync_restrict.space.cluster.scope.cluster() - nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.proxy.sync_restrict #nvvm.mem_order // CHECK: call void @llvm.nvvm.fence.proxy.async_generic.release.sync_restrict.space.cta.scope.cluster() - nvvm.fence.proxy.sync_restrict {order = #nvvm.mem_order} + nvvm.fence.proxy.sync_restrict #nvvm.mem_order llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/fma/fma.mlir b/mlir/test/Target/LLVMIR/nvvm/fma/fma.mlir index 236175daff21e..aa18c57e93ab4 100644 --- a/mlir/test/Target/LLVMIR/nvvm/fma/fma.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/fma/fma.mlir @@ -12,14 +12,14 @@ llvm.func @fma_f16(%a: f16, %b: f16, %c: f16) -> f16 { // CHECK-NEXT: %11 = call half @llvm.nvvm.fma.rn.oob.relu.f16(half %0, half %1, half %10) // CHECK-NEXT: ret half %11 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f16 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : f16 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f16 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : f16 - %f4 = nvvm.fma %a, %b, %f3 {rnd = #nvvm.fp_rnd_mode, relu = true} : f16 - %f5 = nvvm.fma %a, %b, %f4 {rnd = #nvvm.fp_rnd_mode, relu = true, ftz = true} : f16 - %f6 = nvvm.fma %a, %b, %f5 {rnd = #nvvm.fp_rnd_mode, oob = true} : f16 - %f7 = nvvm.fma %a, %b, %f6 {rnd = #nvvm.fp_rnd_mode, oob = true, relu = true} : f16 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f16 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : f16 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f16 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : f16 + %f4 = nvvm.fma %a, %b, %f3 <{rnd = #nvvm.fp_rnd_mode, relu = true}> : f16 + %f5 = nvvm.fma %a, %b, %f4 <{rnd = #nvvm.fp_rnd_mode, relu = true, ftz = true}> : f16 + %f6 = nvvm.fma %a, %b, %f5 <{rnd = #nvvm.fp_rnd_mode, oob = true}> : f16 + %f7 = nvvm.fma %a, %b, %f6 <{rnd = #nvvm.fp_rnd_mode, oob = true, relu = true}> : f16 llvm.return %f7 : f16 } @@ -31,10 +31,10 @@ llvm.func @fma_bf16(%a: bf16, %b: bf16, %c: bf16) -> bf16 { // CHECK-NEXT: %7 = call bfloat @llvm.nvvm.fma.rn.oob.relu.bf16(bfloat %0, bfloat %1, bfloat %6) // CHECK-NEXT: ret bfloat %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : bf16 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, relu = true} : bf16 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, oob = true} : bf16 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, oob = true, relu = true} : bf16 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : bf16 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, relu = true}> : bf16 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, oob = true}> : bf16 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, oob = true, relu = true}> : bf16 llvm.return %f3 : bf16 } @@ -46,10 +46,10 @@ llvm.func @fma_f32_rn(%a: f32, %b: f32, %c: f32) -> f32 { // CHECK-NEXT: %7 = call float @llvm.nvvm.fma.rn.ftz.sat.f(float %0, float %1, float %6) // CHECK-NEXT: ret float %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f32 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : f32 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : f32 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : f32 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : f32 llvm.return %f3 : f32 } @@ -61,10 +61,10 @@ llvm.func @fma_f32_rm(%a: f32, %b: f32, %c: f32) -> f32 { // CHECK-NEXT: %7 = call float @llvm.nvvm.fma.rm.ftz.sat.f(float %0, float %1, float %6) // CHECK-NEXT: ret float %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f32 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : f32 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : f32 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : f32 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : f32 llvm.return %f3 : f32 } @@ -76,10 +76,10 @@ llvm.func @fma_f32_rp(%a: f32, %b: f32, %c: f32) -> f32 { // CHECK-NEXT: %7 = call float @llvm.nvvm.fma.rp.ftz.sat.f(float %0, float %1, float %6) // CHECK-NEXT: ret float %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f32 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : f32 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : f32 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : f32 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : f32 llvm.return %f3 : f32 } @@ -91,10 +91,10 @@ llvm.func @fma_f32_rz(%a: f32, %b: f32, %c: f32) -> f32 { // CHECK-NEXT: %7 = call float @llvm.nvvm.fma.rz.ftz.sat.f(float %0, float %1, float %6) // CHECK-NEXT: ret float %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f32 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : f32 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : f32 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : f32 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : f32 llvm.return %f3 : f32 } @@ -106,9 +106,9 @@ llvm.func @fma_f64(%a: f64, %b: f64, %c: f64) -> f64 { // CHECK-NEXT: %7 = call double @llvm.nvvm.fma.rz.d(double %0, double %1, double %6) // CHECK-NEXT: ret double %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f64 - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode} : f64 - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode} : f64 - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode} : f64 + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode}> : f64 llvm.return %f3 : f64 } diff --git a/mlir/test/Target/LLVMIR/nvvm/fma/fma_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fma/fma_invalid.mlir index ea92b707b65de..8adbb5913b573 100644 --- a/mlir/test/Target/LLVMIR/nvvm/fma/fma_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/fma/fma_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @fma_invalid_rnd_mode(%a : f16, %b : f16, %c : f16) -> f16 { // expected-error@+1 {{rounding mode must be specified}} - %f1 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -12,7 +12,7 @@ llvm.func @fma_invalid_rnd_mode(%a : f16, %b : f16, %c : f16) -> f16 { llvm.func @fma_invalid_sat_mode(%a : f16, %b : f16, %c : f16) -> f16 { // expected-error@+1 {{attribute 'sat' failed to satisfy constraint: Describes the saturation mode whose value is one of {none, sat}}} - %f1 = nvvm.fma %a, %b, %c {sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.fma %a, %b, %c <{sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -20,7 +20,7 @@ llvm.func @fma_invalid_sat_mode(%a : f16, %b : f16, %c : f16) -> f16 { llvm.func @fma_invalid_relu_sat(%a : f16, %b : f16, %c : f16) -> f16 { // expected-error@+1 {{relu and saturation are not supported together}} - %f1 = nvvm.fma %a, %b, %c {relu = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.fma %a, %b, %c <{relu = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -28,7 +28,7 @@ llvm.func @fma_invalid_relu_sat(%a : f16, %b : f16, %c : f16) -> f16 { llvm.func @fma_invalid_oob_sat(%a : f16, %b : f16, %c : f16) -> f16 { // expected-error@+1 {{oob is not supported with saturation or FTZ}} - %f1 = nvvm.fma %a, %b, %c {oob = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.fma %a, %b, %c <{oob = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -36,7 +36,7 @@ llvm.func @fma_invalid_oob_sat(%a : f16, %b : f16, %c : f16) -> f16 { llvm.func @fma_invalid_oob_f64(%a : f64, %b : f64, %c : f64) -> f64 { // expected-error@+1 {{relu and oob are only supported for f16 and bf16}} - %f1 = nvvm.fma %a, %b, %c {oob = true, rnd = #nvvm.fp_rnd_mode} : f64 + %f1 = nvvm.fma %a, %b, %c <{oob = true, rnd = #nvvm.fp_rnd_mode}> : f64 llvm.return %f1 : f64 } @@ -44,7 +44,7 @@ llvm.func @fma_invalid_oob_f64(%a : f64, %b : f64, %c : f64) -> f64 { llvm.func @fma_invalid_relu_oob(%a : f32, %b : f32, %c : f32) -> f32 { // expected-error@+1 {{relu and oob are only supported for f16 and bf16}} - %f1 = nvvm.fma %a, %b, %c {relu = true, rnd = #nvvm.fp_rnd_mode} : f32 + %f1 = nvvm.fma %a, %b, %c <{relu = true, rnd = #nvvm.fp_rnd_mode}> : f32 llvm.return %f1 : f32 } @@ -52,7 +52,7 @@ llvm.func @fma_invalid_relu_oob(%a : f32, %b : f32, %c : f32) -> f32 { llvm.func @fma_invalid_ftz_sat_f64(%a : f64, %b : f64, %c : f64) -> f64 { // expected-error@+1 {{FTZ and saturation are not supported for f64 type}} - %f1 = nvvm.fma %a, %b, %c {ftz = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : f64 + %f1 = nvvm.fma %a, %b, %c <{ftz = true, sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : f64 llvm.return %f1 : f64 } @@ -60,7 +60,7 @@ llvm.func @fma_invalid_ftz_sat_f64(%a : f64, %b : f64, %c : f64) -> f64 { llvm.func @fma_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>, %c : vector<2xf16>) -> vector<2xf16> { // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16>}} - %f1 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %f1 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -68,7 +68,7 @@ llvm.func @fma_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>, %c llvm.func @fma_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>, %c : vector<2xbf16>) -> vector<2xbf16> { // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16>}} - %f1 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f1 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } @@ -76,7 +76,7 @@ llvm.func @fma_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>, llvm.func @fma_invalid_ftz_v2bf16(%a : vector<2xbf16>, %b : vector<2xbf16>, %c : vector<2xbf16>) -> vector<2xbf16> { // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16>}} - %f1 = nvvm.fma %a, %b, %c {ftz = true, rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f1 = nvvm.fma %a, %b, %c <{ftz = true, rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } @@ -84,6 +84,6 @@ llvm.func @fma_invalid_ftz_v2bf16(%a : vector<2xbf16>, %b : vector<2xbf16>, %c : llvm.func @fma_invalid_sat_v2bf16(%a : vector<2xbf16>, %b : vector<2xbf16>, %c : vector<2xbf16>) -> vector<2xbf16> { // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16>}} - %f1 = nvvm.fma %a, %b, %c {sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f1 = nvvm.fma %a, %b, %c <{sat = #nvvm.sat_mode, rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } diff --git a/mlir/test/Target/LLVMIR/nvvm/fma/fma_vector.mlir b/mlir/test/Target/LLVMIR/nvvm/fma/fma_vector.mlir index 020bdcfc27705..8f79121c04795 100644 --- a/mlir/test/Target/LLVMIR/nvvm/fma/fma_vector.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/fma/fma_vector.mlir @@ -12,14 +12,14 @@ llvm.func @fma_f16(%a: vector<2xf16>, %b: vector<2xf16>, %c: vector<2xf16>) -> v // CHECK-NEXT: %11 = call <2 x half> @llvm.nvvm.fma.rn.oob.relu.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %10) // CHECK-NEXT: ret <2 x half> %11 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : vector<2xf16> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : vector<2xf16> - %f4 = nvvm.fma %a, %b, %f3 {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xf16> - %f5 = nvvm.fma %a, %b, %f4 {rnd = #nvvm.fp_rnd_mode, relu = true, ftz = true} : vector<2xf16> - %f6 = nvvm.fma %a, %b, %f5 {rnd = #nvvm.fp_rnd_mode, oob = true} : vector<2xf16> - %f7 = nvvm.fma %a, %b, %f6 {rnd = #nvvm.fp_rnd_mode, oob = true, relu = true} : vector<2xf16> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : vector<2xf16> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : vector<2xf16> + %f4 = nvvm.fma %a, %b, %f3 <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xf16> + %f5 = nvvm.fma %a, %b, %f4 <{rnd = #nvvm.fp_rnd_mode, relu = true, ftz = true}> : vector<2xf16> + %f6 = nvvm.fma %a, %b, %f5 <{rnd = #nvvm.fp_rnd_mode, oob = true}> : vector<2xf16> + %f7 = nvvm.fma %a, %b, %f6 <{rnd = #nvvm.fp_rnd_mode, oob = true, relu = true}> : vector<2xf16> llvm.return %f7 : vector<2xf16> } @@ -31,10 +31,10 @@ llvm.func @fma_bf16(%a: vector<2xbf16>, %b: vector<2xbf16>, %c: vector<2xbf16>) // CHECK-NEXT: %7 = call <2 x bfloat> @llvm.nvvm.fma.rn.oob.relu.v2bf16(<2 x bfloat> %0, <2 x bfloat> %1, <2 x bfloat> %6) // CHECK-NEXT: ret <2 x bfloat> %7 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, relu = true} : vector<2xbf16> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, oob = true} : vector<2xbf16> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, oob = true, relu = true} : vector<2xbf16> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, relu = true}> : vector<2xbf16> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, oob = true}> : vector<2xbf16> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, oob = true, relu = true}> : vector<2xbf16> llvm.return %f3 : vector<2xbf16> } @@ -82,10 +82,10 @@ llvm.func @fma_f32_rn(%a: vector<2xf32>, %b: vector<2xf32>, %c: vector<2xf32>) - // CHECK-NEXT: %43 = insertelement <2 x float> %38, float %42, i32 1 // CHECK-NEXT: ret <2 x float> %43 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : vector<2xf32> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : vector<2xf32> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : vector<2xf32> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : vector<2xf32> llvm.return %f3 : vector<2xf32> } @@ -133,10 +133,10 @@ llvm.func @fma_f32_rm(%a: vector<2xf32>, %b: vector<2xf32>, %c: vector<2xf32>) - // CHECK-NEXT: %43 = insertelement <2 x float> %38, float %42, i32 1 // CHECK-NEXT: ret <2 x float> %43 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : vector<2xf32> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : vector<2xf32> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : vector<2xf32> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : vector<2xf32> llvm.return %f3 : vector<2xf32> } @@ -184,10 +184,10 @@ llvm.func @fma_f32_rp(%a: vector<2xf32>, %b: vector<2xf32>, %c: vector<2xf32>) - // CHECK-NEXT: %43 = insertelement <2 x float> %38, float %42, i32 1 // CHECK-NEXT: ret <2 x float> %43 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : vector<2xf32> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : vector<2xf32> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : vector<2xf32> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : vector<2xf32> llvm.return %f3 : vector<2xf32> } @@ -235,10 +235,10 @@ llvm.func @fma_f32_rz(%a: vector<2xf32>, %b: vector<2xf32>, %c: vector<2xf32>) - // CHECK-NEXT: %43 = insertelement <2 x float> %38, float %42, i32 1 // CHECK-NEXT: ret <2 x float> %43 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode, ftz = true} : vector<2xf32> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true} : vector<2xf32> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode, ftz = true}> : vector<2xf32> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz = true}> : vector<2xf32> llvm.return %f3 : vector<2xf32> } @@ -286,9 +286,9 @@ llvm.func @fma_f64(%a: vector<2xf64>, %b: vector<2xf64>, %c: vector<2xf64>) -> v // CHECK-NEXT: %43 = insertelement <2 x double> %38, double %42, i32 1 // CHECK-NEXT: ret <2 x double> %43 // CHECK-NEXT: } - %f0 = nvvm.fma %a, %b, %c {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> - %f1 = nvvm.fma %a, %b, %f0 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> - %f2 = nvvm.fma %a, %b, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> - %f3 = nvvm.fma %a, %b, %f2 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f0 = nvvm.fma %a, %b, %c <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> + %f1 = nvvm.fma %a, %b, %f0 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> + %f2 = nvvm.fma %a, %b, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> + %f3 = nvvm.fma %a, %b, %f2 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f3 : vector<2xf64> } diff --git a/mlir/test/Target/LLVMIR/nvvm/invalid_convert_fp16x2.mlir b/mlir/test/Target/LLVMIR/nvvm/invalid_convert_fp16x2.mlir index 37756c8737f11..e59b863a41009 100644 --- a/mlir/test/Target/LLVMIR/nvvm/invalid_convert_fp16x2.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/invalid_convert_fp16x2.mlir @@ -4,7 +4,7 @@ llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rounding(%a : f32, %b : f32) { // expected-error @below {{Only RN, RZ, and RS rounding modes are supported for conversions from f32x2 to f16x2.}} - %res = nvvm.convert.f32x2.to.f16x2 %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res = nvvm.convert.f32x2.to.f16x2 %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return } @@ -12,7 +12,7 @@ llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rounding(%a : f32, %b : f32) { llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rbits_1(%a : f32, %b : f32) { // expected-error @below {{random_bits is required for RS rounding mode.}} - %res = nvvm.convert.f32x2.to.f16x2 %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res = nvvm.convert.f32x2.to.f16x2 %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return } @@ -20,7 +20,7 @@ llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rbits_1(%a : f32, %b : f32) { llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rbits_2(%a : f32, %b : f32, %rbits : i32) { // expected-error @below {{random_bits not supported for RN and RZ rounding modes.}} - %res = nvvm.convert.f32x2.to.f16x2 %a, %b, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %res = nvvm.convert.f32x2.to.f16x2 %a, %b, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return } @@ -28,7 +28,7 @@ llvm.func @nvvm_cvt_f32x2_to_f16x2_invalid_rbits_2(%a : f32, %b : f32, %rbits : llvm.func @nvvm_cvt_f32x2_to_bf16x2_invalid_rounding(%a : f32, %b : f32) { // expected-error @below {{Only RN, RZ, and RS rounding modes are supported for conversions from f32x2 to bf16x2.}} - %res = nvvm.convert.f32x2.to.bf16x2 %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res = nvvm.convert.f32x2.to.bf16x2 %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return } @@ -36,12 +36,12 @@ llvm.func @nvvm_cvt_f32x2_to_bf16x2_invalid_rounding(%a : f32, %b : f32) { llvm.func @nvvm_cvt_f32x2_to_bf16x2_invalid_rbits_1(%a : f32, %b : f32) { // expected-error @below {{random_bits is required for RS rounding mode.}} - %res = nvvm.convert.f32x2.to.bf16x2 %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res = nvvm.convert.f32x2.to.bf16x2 %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return } llvm.func @nvvm_cvt_f32x2_to_bf16x2_invalid_rbits_2(%a : f32, %b : f32, %rbits : i32) { // expected-error @below {{random_bits not supported for RN and RZ rounding modes.}} - %res = nvvm.convert.f32x2.to.bf16x2 %a, %b, %rbits {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %res = nvvm.convert.f32x2.to.bf16x2 %a, %b, %rbits <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir index 4b3cafec08a39..910bdddf97854 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir @@ -17,12 +17,12 @@ llvm.func @mbarrier_arrive_drop_expect_tx_generic(%barrier: !llvm.ptr, %txcount // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr, i32 -> i64 - %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i64 - %2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i64 + %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i64 + %2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i64 - %3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr, i32 -> i64 - %4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr, i32 -> i64 - %5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr, i32 -> i64 + %3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr, i32 -> i64 + %4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr, i32 -> i64 + %5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr, i32 -> i64 llvm.return } @@ -37,12 +37,12 @@ llvm.func @mbarrier_arrive_drop_expect_tx_shared(%barrier: !llvm.ptr<3>, %txcoun // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr<3>, i32 -> i64 - %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 - %2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 + %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 + %2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 - %3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<3>, i32 -> i64 - %4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3>, i32 -> i64 - %5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3>, i32 -> i64 + %3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr<3>, i32 -> i64 + %4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3>, i32 -> i64 + %5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3>, i32 -> i64 llvm.return } @@ -57,12 +57,12 @@ llvm.func @mbarrier_arrive_drop_expect_tx_shared_cluster(%barrier: !llvm.ptr<7>, // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7>, i32 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir index b5389bdd30267..ffa8fab9cd7d6 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir @@ -17,12 +17,12 @@ llvm.func @mbarrier_arrive_expect_tx_generic(%barrier: !llvm.ptr, %txcount : i32 // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr, i32 -> i64 - %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i64 - %2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i64 + %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i64 + %2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i64 - %3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr, i32 -> i64 - %4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr, i32 -> i64 - %5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr, i32 -> i64 + %3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr, i32 -> i64 + %4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr, i32 -> i64 + %5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr, i32 -> i64 llvm.return } @@ -37,12 +37,12 @@ llvm.func @mbarrier_arrive_expect_tx_shared(%barrier: !llvm.ptr<3>, %txcount : i // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr<3>, i32 -> i64 - %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 - %2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 + %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 + %2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 - %3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<3>, i32 -> i64 - %4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3>, i32 -> i64 - %5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3>, i32 -> i64 + %3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr<3>, i32 -> i64 + %4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3>, i32 -> i64 + %5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3>, i32 -> i64 llvm.return } @@ -57,12 +57,12 @@ llvm.func @mbarrier_arrive_expect_tx_shared_cluster(%barrier: !llvm.ptr<7>, %txc // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7>, i32 - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{relaxed = true}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7>, i32 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir index 96c910b193f12..4af21c1116683 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir @@ -22,13 +22,13 @@ llvm.func @mbarrier_arrive_generic(%barrier: !llvm.ptr, %count : i32) { // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr -> i64 %1 = nvvm.mbarrier.arrive %barrier, %count : !llvm.ptr -> i64 - %2 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr -> i64 - %3 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr -> i64 + %2 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr -> i64 + %3 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr -> i64 - %4 = nvvm.mbarrier.arrive %barrier {relaxed = true} : !llvm.ptr -> i64 - %5 = nvvm.mbarrier.arrive %barrier, %count {relaxed = true} : !llvm.ptr -> i64 - %6 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr -> i64 - %7 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr -> i64 + %4 = nvvm.mbarrier.arrive %barrier <{relaxed = true}> : !llvm.ptr -> i64 + %5 = nvvm.mbarrier.arrive %barrier, %count <{relaxed = true}> : !llvm.ptr -> i64 + %6 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr -> i64 + %7 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr -> i64 llvm.return } @@ -46,13 +46,13 @@ llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>, %count : i32) { // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64 %1 = nvvm.mbarrier.arrive %barrier, %count : !llvm.ptr<3> -> i64 - %2 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<3> -> i64 - %3 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<3> -> i64 + %2 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3> -> i64 + %3 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3> -> i64 - %4 = nvvm.mbarrier.arrive %barrier {relaxed = true} : !llvm.ptr<3> -> i64 - %5 = nvvm.mbarrier.arrive %barrier, %count {relaxed = true} : !llvm.ptr<3> -> i64 - %6 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3> -> i64 - %7 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3> -> i64 + %4 = nvvm.mbarrier.arrive %barrier <{relaxed = true}> : !llvm.ptr<3> -> i64 + %5 = nvvm.mbarrier.arrive %barrier, %count <{relaxed = true}> : !llvm.ptr<3> -> i64 + %6 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3> -> i64 + %7 = nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3> -> i64 llvm.return } @@ -70,13 +70,13 @@ llvm.func @mbarrier_arrive_shared_cluster(%barrier: !llvm.ptr<7>, %count : i32) // CHECK-NEXT: } nvvm.mbarrier.arrive %barrier : !llvm.ptr<7> nvvm.mbarrier.arrive %barrier, %count : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier {relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier, %count {relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier <{relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier, %count <{relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_arrive_drop.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive_drop.mlir index c345c5d69edad..65906e6fc7d3b 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_arrive_drop.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive_drop.mlir @@ -22,13 +22,13 @@ llvm.func @mbarrier_arrive_drop_generic(%barrier: !llvm.ptr, %count : i32) { // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive_drop %barrier : !llvm.ptr -> i64 %1 = nvvm.mbarrier.arrive_drop %barrier, %count : !llvm.ptr -> i64 - %2 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr -> i64 - %3 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr -> i64 + %2 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr -> i64 + %3 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr -> i64 - %4 = nvvm.mbarrier.arrive_drop %barrier {relaxed = true} : !llvm.ptr -> i64 - %5 = nvvm.mbarrier.arrive_drop %barrier, %count {relaxed = true} : !llvm.ptr -> i64 - %6 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr -> i64 - %7 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr -> i64 + %4 = nvvm.mbarrier.arrive_drop %barrier <{relaxed = true}> : !llvm.ptr -> i64 + %5 = nvvm.mbarrier.arrive_drop %barrier, %count <{relaxed = true}> : !llvm.ptr -> i64 + %6 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr -> i64 + %7 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr -> i64 llvm.return } @@ -46,13 +46,13 @@ llvm.func @mbarrier_arrive_drop_shared(%barrier: !llvm.ptr<3>, %count : i32) { // CHECK-NEXT: } %0 = nvvm.mbarrier.arrive_drop %barrier : !llvm.ptr<3> -> i64 %1 = nvvm.mbarrier.arrive_drop %barrier, %count : !llvm.ptr<3> -> i64 - %2 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<3> -> i64 - %3 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<3> -> i64 + %2 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3> -> i64 + %3 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3> -> i64 - %4 = nvvm.mbarrier.arrive_drop %barrier {relaxed = true} : !llvm.ptr<3> -> i64 - %5 = nvvm.mbarrier.arrive_drop %barrier, %count {relaxed = true} : !llvm.ptr<3> -> i64 - %6 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3> -> i64 - %7 = nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<3> -> i64 + %4 = nvvm.mbarrier.arrive_drop %barrier <{relaxed = true}> : !llvm.ptr<3> -> i64 + %5 = nvvm.mbarrier.arrive_drop %barrier, %count <{relaxed = true}> : !llvm.ptr<3> -> i64 + %6 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3> -> i64 + %7 = nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<3> -> i64 llvm.return } @@ -70,13 +70,13 @@ llvm.func @mbarrier_arrive_drop_shared_cluster(%barrier: !llvm.ptr<7>, %count : // CHECK-NEXT: } nvvm.mbarrier.arrive_drop %barrier : !llvm.ptr<7> nvvm.mbarrier.arrive_drop %barrier, %count : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope} : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier {relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier, %count {relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7> - nvvm.mbarrier.arrive_drop %barrier, %count {scope = #nvvm.mem_scope, relaxed = true} : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier <{relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier, %count <{relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7> + nvvm.mbarrier.arrive_drop %barrier, %count <{scope = #nvvm.mem_scope, relaxed = true}> : !llvm.ptr<7> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_complete_tx.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_complete_tx.mlir index 99289fa03b22c..b1fb4e1c89fd4 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_complete_tx.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_complete_tx.mlir @@ -8,8 +8,8 @@ llvm.func @mbarrier_complete_tx_shared(%barrier: !llvm.ptr<3>, %tx_count : i32) // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.complete_tx %barrier, %tx_count : !llvm.ptr<3>, i32 - nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 - nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 + nvvm.mbarrier.complete_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 + nvvm.mbarrier.complete_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 llvm.return } @@ -22,8 +22,8 @@ llvm.func @mbarrier_complete_tx_shared_cluster(%barrier: !llvm.ptr<7>, %tx_count // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.complete_tx %barrier, %tx_count : !llvm.ptr<7>, i32 - nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 - nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 + nvvm.mbarrier.complete_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.complete_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 llvm.return } \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_expect_tx.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_expect_tx.mlir index dad7237e2f4cc..74872b7228426 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_expect_tx.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_expect_tx.mlir @@ -8,8 +8,8 @@ llvm.func @mbarrier_expect_tx_shared(%barrier: !llvm.ptr<3>, %tx_count : i32) { // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.expect_tx %barrier, %tx_count : !llvm.ptr<3>, i32 - nvvm.mbarrier.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 - nvvm.mbarrier.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 + nvvm.mbarrier.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 + nvvm.mbarrier.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 llvm.return } @@ -22,8 +22,8 @@ llvm.func @mbarrier_expect_tx_shared_cluster(%barrier: !llvm.ptr<7>, %tx_count : // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.mbarrier.expect_tx %barrier, %tx_count : !llvm.ptr<7>, i32 - nvvm.mbarrier.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 - nvvm.mbarrier.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 + nvvm.mbarrier.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 + nvvm.mbarrier.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 llvm.return } \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_init.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_init.mlir index 9c1d1cc0cdc31..632208320b1cc 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_init.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_init.mlir @@ -9,9 +9,9 @@ llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.p // CHECK-NEXT: ret void // CHECK-NEXT: } nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr - nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_gen <{noinc = true}> : !llvm.ptr nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> - nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared <{noinc = true}> : !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir index 4a7776d86b28e..1947999d33540 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir @@ -12,7 +12,7 @@ llvm.func @mbarrier_arrive_ret_check(%barrier: !llvm.ptr<7>) { llvm.func @mbarrier_arrive_invalid_scope(%barrier: !llvm.ptr<7>) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %0 = nvvm.mbarrier.arrive %barrier {scope = #nvvm.mem_scope} : !llvm.ptr<7> -> i64 + %0 = nvvm.mbarrier.arrive %barrier <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> -> i64 llvm.return } @@ -28,7 +28,7 @@ llvm.func @mbarrier_arrive_drop_ret_check(%barrier: !llvm.ptr<7>) { llvm.func @mbarrier_arrive_drop_invalid_scope(%barrier: !llvm.ptr<7>) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %0 = nvvm.mbarrier.arrive_drop %barrier {scope = #nvvm.mem_scope} : !llvm.ptr<7> -> i64 + %0 = nvvm.mbarrier.arrive_drop %barrier <{scope = #nvvm.mem_scope}> : !llvm.ptr<7> -> i64 llvm.return } @@ -36,7 +36,7 @@ llvm.func @mbarrier_arrive_drop_invalid_scope(%barrier: !llvm.ptr<7>) { llvm.func @mbarrier_expect_tx_scope(%barrier: !llvm.ptr<7>, %tx_count: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - nvvm.mbarrier.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 + nvvm.mbarrier.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 llvm.return } @@ -44,7 +44,7 @@ llvm.func @mbarrier_expect_tx_scope(%barrier: !llvm.ptr<7>, %tx_count: i32) { llvm.func @mbarrier_complete_tx_scope(%barrier: !llvm.ptr<3>, %tx_count: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 + nvvm.mbarrier.complete_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 llvm.return } @@ -52,7 +52,7 @@ llvm.func @mbarrier_complete_tx_scope(%barrier: !llvm.ptr<3>, %tx_count: i32) { llvm.func @mbarrier_arr_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 + %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 llvm.return } @@ -60,7 +60,7 @@ llvm.func @mbarrier_arr_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) { llvm.func @mbarrier_arr_expect_tx_cluster(%barrier: !llvm.ptr<7>, %tx_count: i32) { // expected-error @below {{mbarrier in shared_cluster space cannot return any value}} - %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 -> i64 + %1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 -> i64 llvm.return } @@ -76,7 +76,7 @@ llvm.func @init_mbarrier_arrive_expect_tx_asm_ret(%barrier : !llvm.ptr<3>, %txco llvm.func @init_mbarrier_arrive_expect_tx_asm_relaxed(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) { // expected-error @below {{mbarrier with relaxed semantics is not supported when using predicate}} - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred {relaxed = true} : !llvm.ptr<3>, i32, i1 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred <{relaxed = true}> : !llvm.ptr<3>, i32, i1 llvm.return } @@ -84,7 +84,7 @@ llvm.func @init_mbarrier_arrive_expect_tx_asm_relaxed(%barrier : !llvm.ptr<3>, % llvm.func @init_mbarrier_arrive_expect_tx_asm_cta(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) { // expected-error @below {{mbarrier scope must be CTA when using predicate}} - nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32, i1 + nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32, i1 llvm.return } @@ -100,7 +100,7 @@ llvm.func @init_mbarrier_arrive_expect_tx_asm_cluster(%barrier : !llvm.ptr<7>, % llvm.func @mbarrier_arr_drop_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i64 + %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i64 llvm.return } @@ -108,7 +108,7 @@ llvm.func @mbarrier_arr_drop_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) { llvm.func @mbarrier_arr_drop_expect_tx_cluster(%barrier: !llvm.ptr<7>, %tx_count: i32) { // expected-error @below {{mbarrier in shared_cluster space cannot return any value}} - %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope} : !llvm.ptr<7>, i32 -> i64 + %1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count <{scope = #nvvm.mem_scope}> : !llvm.ptr<7>, i32 -> i64 llvm.return } @@ -116,7 +116,7 @@ llvm.func @mbarrier_arr_drop_expect_tx_cluster(%barrier: !llvm.ptr<7>, %tx_count llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr<3>, %phase: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %1 = nvvm.mbarrier.test.wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %1 = nvvm.mbarrier.test.wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 llvm.return } @@ -124,7 +124,7 @@ llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr<3>, %phase: i32) { llvm.func @mbarrier_try_wait(%barrier: !llvm.ptr<3>, %phase: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 llvm.return } @@ -132,7 +132,7 @@ llvm.func @mbarrier_try_wait(%barrier: !llvm.ptr<3>, %phase: i32) { llvm.func @mbarrier_try_wait_with_timelimit(%barrier: !llvm.ptr<3>, %phase: i32, %ticks: i32) { // expected-error @below {{mbarrier scope must be either CTA or Cluster}} - %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32, i32 -> i1 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_test_wait.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_test_wait.mlir index 21ab72eeab167..9ff86c1076d27 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_test_wait.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_test_wait.mlir @@ -13,10 +13,10 @@ llvm.func @mbarrier_test_wait_state(%barrier: !llvm.ptr, %state : i64) { // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.test.wait %barrier, %state : !llvm.ptr, i64 -> i1 - %1 = nvvm.mbarrier.test.wait %barrier, %state {scope = #nvvm.mem_scope} : !llvm.ptr, i64 -> i1 + %1 = nvvm.mbarrier.test.wait %barrier, %state <{scope = #nvvm.mem_scope}> : !llvm.ptr, i64 -> i1 - %2 = nvvm.mbarrier.test.wait %barrier, %state {relaxed = true} : !llvm.ptr, i64 -> i1 - %3 = nvvm.mbarrier.test.wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i64 -> i1 + %2 = nvvm.mbarrier.test.wait %barrier, %state <{relaxed = true}> : !llvm.ptr, i64 -> i1 + %3 = nvvm.mbarrier.test.wait %barrier, %state <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i64 -> i1 llvm.return } @@ -29,10 +29,10 @@ llvm.func @mbarrier_test_wait_shared_state(%barrier: !llvm.ptr<3>, %state : i64) // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.test.wait %barrier, %state : !llvm.ptr<3>, i64 -> i1 - %1 = nvvm.mbarrier.test.wait %barrier, %state {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64 -> i1 + %1 = nvvm.mbarrier.test.wait %barrier, %state <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64 -> i1 - %2 = nvvm.mbarrier.test.wait %barrier, %state {relaxed = true} : !llvm.ptr<3>, i64 -> i1 - %3 = nvvm.mbarrier.test.wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64 -> i1 + %2 = nvvm.mbarrier.test.wait %barrier, %state <{relaxed = true}> : !llvm.ptr<3>, i64 -> i1 + %3 = nvvm.mbarrier.test.wait %barrier, %state <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64 -> i1 llvm.return } @@ -49,10 +49,10 @@ llvm.func @mbarrier_test_wait_phase(%barrier: !llvm.ptr, %phase : i32) { // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.test.wait %barrier, %phase : !llvm.ptr, i32 -> i1 - %1 = nvvm.mbarrier.test.wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i1 + %1 = nvvm.mbarrier.test.wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i1 - %2 = nvvm.mbarrier.test.wait %barrier, %phase {relaxed = true} : !llvm.ptr, i32 -> i1 - %3 = nvvm.mbarrier.test.wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i1 + %2 = nvvm.mbarrier.test.wait %barrier, %phase <{relaxed = true}> : !llvm.ptr, i32 -> i1 + %3 = nvvm.mbarrier.test.wait %barrier, %phase <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i1 llvm.return } @@ -65,9 +65,9 @@ llvm.func @mbarrier_test_wait_shared_phase(%barrier: !llvm.ptr<3>, %phase : i32) // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.test.wait %barrier, %phase : !llvm.ptr<3>, i32 -> i1 - %1 = nvvm.mbarrier.test.wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %1 = nvvm.mbarrier.test.wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 - %2 = nvvm.mbarrier.test.wait %barrier, %phase {relaxed = true} : !llvm.ptr<3>, i32 -> i1 - %3 = nvvm.mbarrier.test.wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %2 = nvvm.mbarrier.test.wait %barrier, %phase <{relaxed = true}> : !llvm.ptr<3>, i32 -> i1 + %3 = nvvm.mbarrier.test.wait %barrier, %phase <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_try_wait.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_try_wait.mlir index 18aaf0e451e20..28ac571547273 100644 --- a/mlir/test/Target/LLVMIR/nvvm/mbar_try_wait.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/mbar_try_wait.mlir @@ -13,10 +13,10 @@ llvm.func @mbarrier_try_wait_state(%barrier: !llvm.ptr, %state : i64) { // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %state : !llvm.ptr, i64 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %state {scope = #nvvm.mem_scope} : !llvm.ptr, i64 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %state <{scope = #nvvm.mem_scope}> : !llvm.ptr, i64 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true} : !llvm.ptr, i64 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i64 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %state <{relaxed = true}> : !llvm.ptr, i64 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %state <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i64 -> i1 llvm.return } @@ -34,10 +34,10 @@ llvm.func @mbarrier_try_wait_state_with_timelimit(%barrier: !llvm.ptr, %state : // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %state, %ticks : !llvm.ptr, i64, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {scope = #nvvm.mem_scope} : !llvm.ptr, i64, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{scope = #nvvm.mem_scope}> : !llvm.ptr, i64, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true} : !llvm.ptr, i64, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i64, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{relaxed = true}> : !llvm.ptr, i64, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i64, i32 -> i1 llvm.return } @@ -51,10 +51,10 @@ llvm.func @mbarrier_try_wait_shared_state(%barrier: !llvm.ptr<3>, %state : i64) // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %state : !llvm.ptr<3>, i64 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %state {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %state <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true} : !llvm.ptr<3>, i64 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %state {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %state <{relaxed = true}> : !llvm.ptr<3>, i64 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %state <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64 -> i1 llvm.return } @@ -67,10 +67,10 @@ llvm.func @mbarrier_try_wait_shared_state_with_timelimit(%barrier: !llvm.ptr<3>, // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %state, %ticks : !llvm.ptr<3>, i64, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true} : !llvm.ptr<3>, i64, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i64, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{relaxed = true}> : !llvm.ptr<3>, i64, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %state, %ticks <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i64, i32 -> i1 llvm.return } @@ -87,10 +87,10 @@ llvm.func @mbarrier_try_wait_phase(%barrier: !llvm.ptr, %phase : i32) { // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %phase : !llvm.ptr, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true} : !llvm.ptr, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %phase <{relaxed = true}> : !llvm.ptr, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %phase <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i32 -> i1 llvm.return } @@ -107,10 +107,10 @@ llvm.func @mbarrier_try_wait_phase_with_timelimit(%barrier: !llvm.ptr, %phase : // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks : !llvm.ptr, i32, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope} : !llvm.ptr, i32, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{scope = #nvvm.mem_scope}> : !llvm.ptr, i32, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true} : !llvm.ptr, i32, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr, i32, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{relaxed = true}> : !llvm.ptr, i32, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr, i32, i32 -> i1 llvm.return } @@ -123,10 +123,10 @@ llvm.func @mbarrier_try_wait_shared_phase(%barrier: !llvm.ptr<3>, %phase : i32) // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %phase : !llvm.ptr<3>, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %phase {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true} : !llvm.ptr<3>, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %phase {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %phase <{relaxed = true}> : !llvm.ptr<3>, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %phase <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32 -> i1 llvm.return } @@ -139,9 +139,9 @@ llvm.func @mbarrier_try_wait_shared_phase_with_timelimit(%barrier: !llvm.ptr<3>, // CHECK-NEXT: ret void // CHECK-NEXT: } %0 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks : !llvm.ptr<3>, i32, i32 -> i1 - %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32, i32 -> i1 + %1 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32, i32 -> i1 - %2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true} : !llvm.ptr<3>, i32, i32 -> i1 - %3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks {relaxed = true, scope = #nvvm.mem_scope} : !llvm.ptr<3>, i32, i32 -> i1 + %2 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{relaxed = true}> : !llvm.ptr<3>, i32, i32 -> i1 + %3 = nvvm.mbarrier.try_wait %barrier, %phase, %ticks <{relaxed = true, scope = #nvvm.mem_scope}> : !llvm.ptr<3>, i32, i32 -> i1 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir index f91f68852fb7e..641f8f03c6fa6 100644 --- a/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir @@ -4,7 +4,7 @@ llvm.func @redux_sync_i32_with_abs(%value: i32, %offset: i32) { // expected-error@+1 {{abs attribute is supported only for f32 type}} - %res = nvvm.redux.sync add %value, %offset {abs = true}: i32 -> i32 + %res = nvvm.redux.sync add %value, %offset abs = true : i32 -> i32 llvm.return } @@ -12,7 +12,7 @@ llvm.func @redux_sync_i32_with_abs(%value: i32, %offset: i32) { llvm.func @redux_sync_i32_with_nan(%value: i32, %offset: i32) { // expected-error@+1 {{nan attribute is supported only for f32 type}} - %res = nvvm.redux.sync add %value, %offset {nan = true}: i32 -> i32 + %res = nvvm.redux.sync add %value, %offset nan = true : i32 -> i32 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir index f2ccfe71a3f23..0af71ef377d55 100644 --- a/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir @@ -18,5 +18,5 @@ func.func @nvvm_invalid_shfl_invalid_return_type_1(%arg0 : i32, %arg1 : f32, %ar func.func @nvvm_invalid_shfl_invalid_return_type_2(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected first element in the returned struct to be of type 'f32' but got 'i32' instead}} - %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 {return_value_and_is_valid} : f32 -> !llvm.struct<(i32, i1)> + %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(i32, i1)> } diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir index e21bcfb42023d..d404992bb8b94 100644 --- a/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir @@ -14,9 +14,9 @@ llvm.func @fsub_f16_f16(%a : f16, %b : f16) -> f16 { // CHECK-NEXT: ret half %10 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : f16 - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f16 - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f16 - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f16 + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f16 + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f16 + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f16 llvm.return %f4 : f16 } @@ -30,7 +30,7 @@ llvm.func @fsub_bf16_bf16(%a : bf16, %b : bf16) -> bf16 { // CHECK-NEXT: ret bfloat %6 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : bf16 - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : bf16 + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : bf16 llvm.return %f2 : bf16 } @@ -74,22 +74,22 @@ llvm.func @fsub_f32_f32(%a : f32, %b : f32) -> f32 { // CHECK-NEXT: ret float %36 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : f32 - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f32 - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f6 = nvvm.subf %f5, %f5 {rnd = #nvvm.fp_rnd_mode} : f32 - %f7 = nvvm.subf %f6, %f6 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f8 = nvvm.subf %f7, %f7 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f9 = nvvm.subf %f8, %f8 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f10 = nvvm.subf %f9, %f9 {rnd = #nvvm.fp_rnd_mode} : f32 - %f11 = nvvm.subf %f10, %f10 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f12 = nvvm.subf %f11, %f11 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f13 = nvvm.subf %f12, %f12 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 - %f14 = nvvm.subf %f13, %f13 {rnd = #nvvm.fp_rnd_mode} : f32 - %f15 = nvvm.subf %f14, %f14 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 - %f16 = nvvm.subf %f15, %f15 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 - %f17 = nvvm.subf %f16, %f16 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f5 = nvvm.subf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f6 = nvvm.subf %f5, %f5 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f7 = nvvm.subf %f6, %f6 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f8 = nvvm.subf %f7, %f7 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f9 = nvvm.subf %f8, %f8 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f10 = nvvm.subf %f9, %f9 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f11 = nvvm.subf %f10, %f10 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f12 = nvvm.subf %f11, %f11 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f13 = nvvm.subf %f12, %f12 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 + %f14 = nvvm.subf %f13, %f13 <{rnd = #nvvm.fp_rnd_mode}> : f32 + %f15 = nvvm.subf %f14, %f14 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : f32 + %f16 = nvvm.subf %f15, %f15 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : f32 + %f17 = nvvm.subf %f16, %f16 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : f32 llvm.return %f17 : f32 } @@ -109,9 +109,9 @@ llvm.func @fsub_f64_f64(%a : f64, %b : f64) -> f64 { // CHECK-NEXT: ret double %12 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : f64 - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f64 - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode} : f64 - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode} : f64 - %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode} : f64 + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode}> : f64 + %f5 = nvvm.subf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode}> : f64 llvm.return %f5 : f64 } diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir index bf4bbd19cb396..055273739fa78 100644 --- a/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @subf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{ attribute 'sat' failed to satisfy constraint: Describes the saturation mode whose value is one of {none, sat}}} - %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode} : f16 + %f1 = nvvm.subf %a, %b <{sat = #nvvm.sat_mode}> : f16 llvm.return %f1 : f16 } @@ -12,7 +12,7 @@ llvm.func @subf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { llvm.func @subf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { // expected-error@+1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}} - %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode, ftz=true} : f64 + %f1 = nvvm.subf %a, %b <{sat = #nvvm.sat_mode, ftz=true}> : f64 llvm.return %f1 : f64 } @@ -20,7 +20,7 @@ llvm.func @subf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { llvm.func @subf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : f16 + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : f16 llvm.return %f1 : f16 } @@ -28,7 +28,7 @@ llvm.func @subf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { llvm.func @subf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> { // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -36,7 +36,7 @@ llvm.func @subf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) - llvm.func @subf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : bf16 + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : bf16 llvm.return %f1 : bf16 } @@ -44,7 +44,7 @@ llvm.func @subf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { llvm.func @subf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> { // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } @@ -52,7 +52,7 @@ llvm.func @subf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16> llvm.func @subf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode, ftz=true} : bf16 + %f1 = nvvm.subf %a, %b <{sat = #nvvm.sat_mode, ftz=true}> : bf16 llvm.return %f1 : bf16 } @@ -62,6 +62,6 @@ llvm.func @subf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { // available. llvm.func @subf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 { // expected-error@+1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions/subtractions}} - %f1 = nvvm.subf %a, %b {ftz=true} : f16 + %f1 = nvvm.subf %a, %b <{ftz=true}> : f16 llvm.return %f1 : f16 } diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir index 3dca3fc41fa34..dc44411a70880 100644 --- a/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir @@ -14,9 +14,9 @@ llvm.func @subf_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector // CHECK-NEXT: ret <2 x half> %4 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : vector<2xf16> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf16> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf16> + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf16> + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -30,7 +30,7 @@ llvm.func @subf_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> ve // CHECK-NEXT: ret <2 x bfloat> %6 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : vector<2xbf16> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xbf16> llvm.return %f2 : vector<2xbf16> } @@ -85,10 +85,10 @@ llvm.func @subf_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: ret <2 x float> %38 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : vector<2xf32> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f5 = nvvm.subf %f4, %f4 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -132,10 +132,10 @@ llvm.func @subf_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 // CHECK-NEXT: ret <2 x float> %38 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -179,10 +179,10 @@ llvm.func @subf_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 // CHECK-NEXT: ret <2 x float> %38 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -226,10 +226,10 @@ llvm.func @subf_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 // CHECK-NEXT: ret <2 x float> %38 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> - %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> - %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 <{rnd = #nvvm.fp_rnd_mode, ftz=true}> : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true}> : vector<2xf32> llvm.return %f4 : vector<2xf32> } @@ -257,7 +257,7 @@ llvm.func @subf_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: ret <2 x double> %20 // CHECK-NEXT: } %f1 = nvvm.subf %a, %b : vector<2xf64> - %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f2 = nvvm.subf %f1, %f1 <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f2 : vector<2xf64> } @@ -274,7 +274,7 @@ llvm.func @subf_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 // CHECK-NEXT: ret <2 x double> %11 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } @@ -291,7 +291,7 @@ llvm.func @subf_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 // CHECK-NEXT: ret <2 x double> %11 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } @@ -308,6 +308,6 @@ llvm.func @subf_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vec // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 // CHECK-NEXT: ret <2 x double> %11 // CHECK-NEXT: } - %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + %f1 = nvvm.subf %a, %b <{rnd = #nvvm.fp_rnd_mode}> : vector<2xf64> llvm.return %f1 : vector<2xf64> } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir index a8f80296f20ae..8befe53eaddb8 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir @@ -6,7 +6,7 @@ llvm.func @llvm_nvvm_tcgen05_alloc(%addr : !llvm.ptr, %ncols : i32) { nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr, i32 // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %{{.*}}, i32 %{{.*}}) - nvvm.tcgen05.alloc %addr, %ncols {group = #nvvm.cta_group} : !llvm.ptr, i32 + nvvm.tcgen05.alloc %addr, %ncols <{group = #nvvm.cta_group}> : !llvm.ptr, i32 llvm.return } @@ -16,7 +16,7 @@ llvm.func @llvm_nvvm_tcgen05_alloc_shared(%addr : !llvm.ptr<3>, %ncols : i32) { nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr<3>, i32 // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) - nvvm.tcgen05.alloc %addr, %ncols {group = #nvvm.cta_group} : !llvm.ptr<3>, i32 + nvvm.tcgen05.alloc %addr, %ncols <{group = #nvvm.cta_group}> : !llvm.ptr<3>, i32 llvm.return } @@ -26,7 +26,7 @@ llvm.func @llvm_nvvm_tcgen05_dealloc(%addr : !llvm.ptr<6>, %ncols : i32) { nvvm.tcgen05.dealloc %addr, %ncols : !llvm.ptr<6>, i32 // CHECK-LLVM: call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %{{.*}}, i32 %{{.*}}) - nvvm.tcgen05.dealloc %addr, %ncols {group = #nvvm.cta_group} : !llvm.ptr<6>, i32 + nvvm.tcgen05.dealloc %addr, %ncols <{group = #nvvm.cta_group}> : !llvm.ptr<6>, i32 llvm.return } @@ -36,6 +36,6 @@ llvm.func @llvm_nvvm_tcgen05_relinquish_alloc_permit() { nvvm.tcgen05.relinquish_alloc_permit // CHECK-LLVM: call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2() - nvvm.tcgen05.relinquish_alloc_permit {group = #nvvm.cta_group} + nvvm.tcgen05.relinquish_alloc_permit <{group = #nvvm.cta_group}> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir index 60475bf64ae7a..b6c6543506b66 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-commit.mlir @@ -6,13 +6,13 @@ llvm.func @llvm_nvvm_tcgen05_commit_generic(%barrier : !llvm.ptr, %cta_mask : i1 nvvm.tcgen05.commit %barrier : !llvm.ptr // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.cg2(ptr %{{.*}}) - nvvm.tcgen05.commit %barrier {group = #nvvm.cta_group} : !llvm.ptr + nvvm.tcgen05.commit %barrier <{group = #nvvm.cta_group}> : !llvm.ptr // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %{{.*}}, i16 %{{.*}}) nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr, i16 // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %{{.*}}, i16 %{{.*}}) - nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.cta_group} : !llvm.ptr, i16 + nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask <{group = #nvvm.cta_group}> : !llvm.ptr, i16 llvm.return } @@ -22,12 +22,12 @@ llvm.func @llvm_nvvm_tcgen05_commit_shared(%barrier : !llvm.ptr<3>, %cta_mask : nvvm.tcgen05.commit %barrier : !llvm.ptr<3> // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %{{.*}}) - nvvm.tcgen05.commit %barrier {group = #nvvm.cta_group} : !llvm.ptr<3> + nvvm.tcgen05.commit %barrier <{group = #nvvm.cta_group}> : !llvm.ptr<3> // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %{{.*}}, i16 %{{.*}}) nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr<3>, i16 // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %{{.*}}, i16 %{{.*}}) - nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.cta_group} : !llvm.ptr<3>, i16 + nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask <{group = #nvvm.cta_group}> : !llvm.ptr<3>, i16 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir index 237b15ba36739..f06e1fc8941e5 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-cp.mlir @@ -3,101 +3,101 @@ // CHECK-LABEL: @nvvm_tcgen05_cp_128x256b llvm.func @nvvm_tcgen05_cp_128x256b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK: call void @llvm.nvvm.tcgen05.cp.128x256b.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape}> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x256b.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group}> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> llvm.return } // CHECK-LABEL: @nvvm_tcgen05_cp_4x256b llvm.func @nvvm_tcgen05_cp_4x256b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK: call void @llvm.nvvm.tcgen05.cp.4x256b.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape}> // CHECK: call void @llvm.nvvm.tcgen05.cp.4x256b.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group}> // CHECK: call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> llvm.return } // CHECK-LABEL: @nvvm_tcgen05_cp_128x128b llvm.func @nvvm_tcgen05_cp_128x128b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK: call void @llvm.nvvm.tcgen05.cp.128x128b.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape}> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x128b.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group}> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> llvm.return } // CHECK-LABEL: @nvvm_tcgen05_cp_64x128b llvm.func @nvvm_tcgen05_cp_64x128b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK: call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> llvm.return } @@ -105,32 +105,32 @@ llvm.func @nvvm_tcgen05_cp_64x128b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK-LABEL: @nvvm_tcgen05_cp_32x128b llvm.func @nvvm_tcgen05_cp_32x128b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // CHECK: call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg2(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> // CHECK: call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg1(ptr addrspace(6) %{{.*}}, i64 %{{.*}}) - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, group = #nvvm.cta_group, multicast = #nvvm.tcgen05_cp_multicast, srcFormat = #nvvm.tcgen05_cp_src_fmt - } + }> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir index 1b93f20c15b99..bb733fe3a64db 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir @@ -4,6 +4,6 @@ llvm.func @nvvm_tcgen05_ld_32x32b_offset(%tmemAddr : !llvm.ptr<6>, %offset : i64) -> () { // expected-error@+1 {{offset argument is only supported for shape 16x32bx2}} - %ldv2 = nvvm.tcgen05.ld %tmemAddr, %offset { pack, shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr, %offset pack <{shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red-invalid.mlir index 07734bdaded1e..9f26e183e5681 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red-invalid.mlir @@ -2,7 +2,7 @@ llvm.func @tcgen05_ld_red_add(%addr : !llvm.ptr<6>) { // expected-error @below {{only min and max reduction kinds are supported}} - %data, %redval = nvvm.tcgen05.ld.red add %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red add %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 llvm.return } @@ -10,7 +10,7 @@ llvm.func @tcgen05_ld_red_add(%addr : !llvm.ptr<6>) { llvm.func @tcgen05_ld_red_and(%addr : !llvm.ptr<6>) { // expected-error @below {{only min and max reduction kinds are supported}} - %data, %redval = nvvm.tcgen05.ld.red and %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red and %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 llvm.return } @@ -18,7 +18,7 @@ llvm.func @tcgen05_ld_red_and(%addr : !llvm.ptr<6>) { llvm.func @tcgen05_ld_red_same_types(%addr : !llvm.ptr<6>) { // expected-error @below {{type of reduction value and element type of vector data should match}} - %data, %redval = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, f32 + %data, %redval = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, f32 llvm.return } @@ -26,7 +26,7 @@ llvm.func @tcgen05_ld_red_same_types(%addr : !llvm.ptr<6>) { llvm.func @tcgen05_ld_red_i32_abs_nan(%addr : !llvm.ptr<6>) { // expected-error @below {{abs or nan is only applicable for f32 type}} - %data, %redval = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<2 x i32>, i32 llvm.return } @@ -35,7 +35,7 @@ llvm.func @tcgen05_ld_red_i32_abs_nan(%addr : !llvm.ptr<6>) { llvm.func @tcgen05_ld_red_i32_abs_nan(%addr : !llvm.ptr<6>) { // expected-error @below {{abs or nan is only applicable for f32 type}} - %data, %redval = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs}> : vector<2 x i32>, i32 llvm.return } @@ -43,6 +43,6 @@ llvm.func @tcgen05_ld_red_i32_abs_nan(%addr : !llvm.ptr<6>) { llvm.func @tcgen05_ld_red_i32_abs_nan(%addr : !llvm.ptr<6>) { // expected-error @below {{abs or nan is only applicable for f32 type}} - %data, %redval = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, nan} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, nan}> : vector<2 x i32>, i32 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red.mlir index 492c22ecf2fd3..bf94784a0c2c4 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-red.mlir @@ -46,33 +46,33 @@ llvm.func @nvvm_tcgen05_ld_32x32b_min(%addr : !llvm.ptr<6>) { // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data, %redval = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 - %data1, %redval1 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32>, i32 + %data1, %redval1 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32>, i32 - %data2, %redval2 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32>, i32 + %data2, %redval2 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32>, i32 - %data3, %redval3 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32>, i32 + %data3, %redval3 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32>, i32 - %data4, %redval4 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32>, i32 + %data4, %redval4 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32>, i32 - %data5, %redval5 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32>, i32 + %data5, %redval5 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32>, i32 - %data6, %redval6 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32>, i32 + %data6, %redval6 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32>, i32 - %data7, %redval7 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x f32>, f32 llvm.return } @@ -122,33 +122,33 @@ llvm.func @nvvm_tcgen05_ld_32x32b_max(%addr : !llvm.ptr<6>) { // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data, %redval = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 - %data1, %redval1 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32>, i32 + %data1, %redval1 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32>, i32 - %data2, %redval2 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32>, i32 + %data2, %redval2 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32>, i32 - %data3, %redval3 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32>, i32 + %data3, %redval3 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32>, i32 - %data4, %redval4 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32>, i32 + %data4, %redval4 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32>, i32 - %data5, %redval5 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32>, i32 + %data5, %redval5 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32>, i32 - %data6, %redval6 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32>, i32 + %data6, %redval6 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32>, i32 - %data7, %redval7 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x f32>, f32 llvm.return } @@ -177,19 +177,19 @@ llvm.func @nvvm_tcgen05_ld_32x32b_min_abs_nan(%addr : !llvm.ptr<6>) { // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data7, %redval7 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red min %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red min %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<128 x f32>, f32 llvm.return } @@ -218,19 +218,19 @@ llvm.func @nvvm_tcgen05_ld_32x32b_max_abs_nan(%addr : !llvm.ptr<6>) { // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK: %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data7, %redval7 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red max %addr { shape = #nvvm.tcgen05_ldst_shape, abs, nan} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red max %addr <{ shape = #nvvm.tcgen05_ldst_shape, abs, nan}> : vector<128 x f32>, f32 llvm.return } @@ -282,33 +282,33 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_min(%addr : !llvm.ptr<6>) { // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data, %redval = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 - %data1, %redval1 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32>, i32 + %data1, %redval1 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32>, i32 - %data2, %redval2 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32>, i32 + %data2, %redval2 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32>, i32 - %data3, %redval3 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32>, i32 + %data3, %redval3 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32>, i32 - %data4, %redval4 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32>, i32 + %data4, %redval4 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32>, i32 - %data5, %redval5 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32>, i32 + %data5, %redval5 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32>, i32 - %data6, %redval6 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32>, i32 + %data6, %redval6 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32>, i32 - %data7, %redval7 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x f32>, f32 llvm.return } @@ -360,33 +360,33 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_max(%addr : !llvm.ptr<6>) { // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data, %redval = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32>, i32 + %data, %redval = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32>, i32 - %data1, %redval1 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32>, i32 + %data1, %redval1 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32>, i32 - %data2, %redval2 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32>, i32 + %data2, %redval2 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32>, i32 - %data3, %redval3 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32>, i32 + %data3, %redval3 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32>, i32 - %data4, %redval4 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32>, i32 + %data4, %redval4 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32>, i32 - %data5, %redval5 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32>, i32 + %data5, %redval5 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32>, i32 - %data6, %redval6 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32>, i32 + %data6, %redval6 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32>, i32 - %data7, %redval7 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x f32>, f32 llvm.return } @@ -416,19 +416,19 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_min_nan_abs(%addr : !llvm.ptr<6>) { // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data7, %redval7 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red min %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red min %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<128 x f32>, f32 llvm.return } @@ -458,18 +458,18 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_max_nan_abs(%addr : !llvm.ptr<6>) { // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 0 // CHECK %{{.*}} = extractvalue { <128 x float>, float } %{{.*}}, 1 - %data7, %redval7 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<2 x f32>, f32 + %data7, %redval7 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<2 x f32>, f32 - %data8, %redval8 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<4 x f32>, f32 + %data8, %redval8 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<4 x f32>, f32 - %data9, %redval9 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<8 x f32>, f32 + %data9, %redval9 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<8 x f32>, f32 - %data10, %redval10 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<16 x f32>, f32 + %data10, %redval10 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<16 x f32>, f32 - %data11, %redval11 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<32 x f32>, f32 + %data11, %redval11 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<32 x f32>, f32 - %data12, %redval12 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<64 x f32>, f32 + %data12, %redval12 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<64 x f32>, f32 - %data13, %redval13 = nvvm.tcgen05.ld.red max %addr, %offset { shape = #nvvm.tcgen05_ldst_shape, nan, abs} : vector<128 x f32>, f32 + %data13, %redval13 = nvvm.tcgen05.ld.red max %addr, %offset <{ shape = #nvvm.tcgen05_ldst_shape, nan, abs}> : vector<128 x f32>, f32 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld.mlir index b1266b0e8151d..3f052614f62d4 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld.mlir @@ -4,28 +4,28 @@ llvm.func @nvvm_tcgen05_ld_16x64b(%tmemAddr : !llvm.ptr<6>) { // CHECK: call i32 @llvm.nvvm.tcgen05.ld.16x64b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv1 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv2 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv4 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv8 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv16= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv32= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv64= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x128(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv128= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -34,28 +34,28 @@ llvm.func @nvvm_tcgen05_ld_16x64b(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_16x64b_pack(%tmemAddr : !llvm.ptr<6>) { // CHECK: call i32 @llvm.nvvm.tcgen05.ld.16x64b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv1 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv2 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv4 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv8 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv16= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv32= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv64= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x64b.x128(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv128= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -64,25 +64,25 @@ llvm.func @nvvm_tcgen05_ld_16x64b_pack(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_16x128b(%tmemAddr : !llvm.ptr<6>) { // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv2 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv4 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv8 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv16= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv32= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv64= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv128= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -91,25 +91,25 @@ llvm.func @nvvm_tcgen05_ld_16x128b(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_16x128b_pack(%tmemAddr : !llvm.ptr<6>) { // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv2 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv4 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv8 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv16= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv32= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv64= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x128b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv128= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -118,22 +118,22 @@ llvm.func @nvvm_tcgen05_ld_16x128b_pack(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_16x256b(%tmemAddr : !llvm.ptr<6>) { // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv4 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv8 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv16= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv32= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv64= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv128= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -142,22 +142,22 @@ llvm.func @nvvm_tcgen05_ld_16x256b(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_16x256b_pack(%tmemAddr : !llvm.ptr<6>) { // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv4 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv8 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv16= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv32= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv64= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x256b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv128= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -166,28 +166,28 @@ llvm.func @nvvm_tcgen05_ld_16x256b_pack(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_32x32b(%tmemAddr : !llvm.ptr<6>) { // CHECK: call i32 @llvm.nvvm.tcgen05.ld.32x32b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv1 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv2 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv4 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv8 = nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv16= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv32= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv64= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x128(ptr addrspace(6) {{%[0-9]+}}, i1 false) - %ldv128= nvvm.tcgen05.ld %tmemAddr { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -196,28 +196,28 @@ llvm.func @nvvm_tcgen05_ld_32x32b(%tmemAddr : !llvm.ptr<6>) { llvm.func @nvvm_tcgen05_ld_32x32b_pack(%tmemAddr : !llvm.ptr<6>) { // CHECK: call i32 @llvm.nvvm.tcgen05.ld.32x32b.x1(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv1 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x2(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv2 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x4(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv4 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x8(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv8 = nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x16(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv16= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x32(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv32= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x64(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv64= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.32x32b.x128(ptr addrspace(6) {{%[0-9]+}}, i1 true) - %ldv128= nvvm.tcgen05.ld %tmemAddr pack { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -228,28 +228,28 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2(%tmemAddr : !llvm.ptr<6>) { %halfSplitOffset = llvm.mlir.constant(2:i64) : i64 // CHECK: call i32 @llvm.nvvm.tcgen05.ld.16x32bx2.x1(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv1 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x2(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv2 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x4(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv4 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x8(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv8 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x16(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv16= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x32(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv32= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x64(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv64= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x128(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 false) - %ldv128= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } @@ -260,28 +260,28 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_pack(%tmemAddr : !llvm.ptr<6>) { %halfSplitOffset = llvm.mlir.constant(2:i64) : i64 // CHECK: call i32 @llvm.nvvm.tcgen05.ld.16x32bx2.x1(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv1 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : i32 + %ldv1 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : i32 // CHECK: call <2 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x2(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv2 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<2 x i32> + %ldv2 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<2 x i32> // CHECK: call <4 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x4(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv4 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<4 x i32> + %ldv4 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<4 x i32> // CHECK: call <8 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x8(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv8 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<8 x i32> + %ldv8 = nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<8 x i32> // CHECK: call <16 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x16(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv16= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<16 x i32> + %ldv16= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<16 x i32> // CHECK: call <32 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x32(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv32= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<32 x i32> + %ldv32= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<32 x i32> // CHECK: call <64 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x64(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv64= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<64 x i32> + %ldv64= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<64 x i32> // CHECK: call <128 x i32> @llvm.nvvm.tcgen05.ld.16x32bx2.x128(ptr addrspace(6) {{%[0-9]+}}, i64 2, i1 true) - %ldv128= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack { shape = #nvvm.tcgen05_ldst_shape} : vector<128 x i32> + %ldv128= nvvm.tcgen05.ld %tmemAddr, %halfSplitOffset pack <{ shape = #nvvm.tcgen05_ldst_shape}> : vector<128 x i32> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir index 99e3fe45a51b7..2fb30c683c09e 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir @@ -6,35 +6,35 @@ llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -44,35 +44,35 @@ llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -82,35 +82,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_de // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -120,35 +120,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_de // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -158,35 +158,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -196,35 +196,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir index 5138160201103..c7dc8fe59081b 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir @@ -6,35 +6,35 @@ llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -44,35 +44,35 @@ llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -82,35 +82,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tm // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -120,35 +120,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tm // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -158,35 +158,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -196,35 +196,35 @@ llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, % // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir index 3dd0d7a6c9aaa..c4a2d73273acd 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir @@ -4,7 +4,7 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>) { // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) llvm.return } @@ -14,7 +14,7 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>) { // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) llvm.return } @@ -24,7 +24,7 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a llvm.func @nvvm_tcgen05_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { // expected-error @below {{A-shift can be applied only when matrix A is in tensor memory}} nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -34,7 +34,7 @@ llvm.func @nvvm_tcgen05_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i64, llvm.func @nvvm_tcgen05_mma_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { // expected-error @below {{Cannot use collector buffer operation fill or use with ashift}} nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) llvm.return } @@ -44,7 +44,7 @@ llvm.func @nvvm_tcgen05_mma_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6> llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>) { // expected-error @below {{mxf4nvf4 requires block scale attribute}} nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> {aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -54,7 +54,7 @@ llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr<6>, llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>) { // expected-error @below {{mxf4 kind does not support block16 attribute}} nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, ashift, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> {ashift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -64,7 +64,7 @@ llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) llvm.return } @@ -74,7 +74,7 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) llvm.return } @@ -84,7 +84,7 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, llvm.func @nvvm_tcgen05_sp_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{A-shift can be applied only when matrix A is in tensor memory}} nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -94,7 +94,7 @@ llvm.func @nvvm_tcgen05_sp_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i6 llvm.func @nvvm_tcgen05_mma_sp_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{Cannot use collector buffer operation fill or use with ashift}} nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -104,7 +104,7 @@ llvm.func @nvvm_tcgen05_mma_sp_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{mxf4nvf4 requires block scale attribute}} nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> {aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -114,7 +114,7 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr< llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{mxf4 kind does not support block16 attribute}} nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, ashift, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> {ashift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -124,7 +124,7 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, // CHECK-LABEL: @nvvm_tcgen05_mma_invalid_kind_mxf8f6f4 llvm.func @nvvm_tcgen05_mma_invalid_kind_mxf8f6f4(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {f16, tf32, f8f6f4, i8}}} - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -134,7 +134,7 @@ llvm.func @nvvm_tcgen05_mma_invalid_kind_mxf8f6f4(%d_tmem : !llvm.ptr<6>, %a_des // CHECK-LABEL: @nvvm_tcgen05_mma_sp_invalid_kind_mxf4 llvm.func @nvvm_tcgen05_mma_sp_invalid_kind_mxf4(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {f16, tf32, f8f6f4, i8}}} - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -144,7 +144,7 @@ llvm.func @nvvm_tcgen05_mma_sp_invalid_kind_mxf4(%d_tmem : !llvm.ptr<6>, %a_desc // CHECK-LABEL: @nvvm_tcgen05_mma_ws_invalid_kind_mxf4nvf4 llvm.func @nvvm_tcgen05_mma_ws_invalid_kind_mxf4nvf4(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {f16, tf32, f8f6f4, i8}}} - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d <{kind = #nvvm.tcgen05_mma_kind}> {ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -154,7 +154,7 @@ llvm.func @nvvm_tcgen05_mma_ws_invalid_kind_mxf4nvf4(%d_tmem : !llvm.ptr<6>, %a_ // CHECK-LABEL: @nvvm_tcgen05_mma_ws_sp_invalid_kind_mxf8f6f4 llvm.func @nvvm_tcgen05_mma_ws_sp_invalid_kind_mxf8f6f4(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {f16, tf32, f8f6f4, i8}}} - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata <{kind = #nvvm.tcgen05_mma_kind}> {ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -164,7 +164,7 @@ llvm.func @nvvm_tcgen05_mma_ws_sp_invalid_kind_mxf8f6f4(%d_tmem : !llvm.ptr<6>, // CHECK-LABEL: @nvvm_tcgen05_mma_block_scale_invalid_kind_f16 llvm.func @nvvm_tcgen05_mma_block_scale_invalid_kind_f16(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b: !llvm.ptr<6>) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {mxf8f6f4, mxf4, mxf4nvf4}}} - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) + nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -174,6 +174,6 @@ llvm.func @nvvm_tcgen05_mma_block_scale_invalid_kind_f16(%d_tmem : !llvm.ptr<6>, // CHECK-LABEL: @nvvm_tcgen05_mma_sp_block_scale_invalid_kind_tf32 llvm.func @nvvm_tcgen05_mma_sp_block_scale_invalid_kind_tf32(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>, %scale_a: !llvm.ptr<6>, %scale_b: !llvm.ptr<6>) { // expected-error @below {{attribute 'kind' failed to satisfy constraint: tcgen05 MMA Supported Types whose value is one of {mxf8f6f4, mxf4, mxf4nvf4}}} - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir index 286df36730e77..9f909ac79daf2 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir @@ -5,67 +5,67 @@ llvm.func @nvvm_tcgen05_mma_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -75,67 +75,67 @@ llvm.func @nvvm_tcgen05_mma_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -148,35 +148,35 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) llvm.return } @@ -188,35 +188,35 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) llvm.return } @@ -226,67 +226,67 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) llvm.return } @@ -296,67 +296,67 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) llvm.return } @@ -368,35 +368,35 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llv // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) llvm.return } @@ -408,35 +408,35 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llv // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir index a683ec96d11a9..dab1424bec424 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir @@ -6,35 +6,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -44,35 +44,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -82,35 +82,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -120,35 +120,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -158,35 +158,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -196,35 +196,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir index a5db4f2bbac4b..0fe81524fa9ea 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir @@ -6,35 +6,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -44,35 +44,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -82,35 +82,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -120,35 +120,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -158,35 +158,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } @@ -196,35 +196,35 @@ llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir index 96044cf669d63..8f9fd92205228 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir @@ -5,67 +5,67 @@ llvm.func @nvvm_tcgen05_mma_sp_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %ades // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -75,67 +75,67 @@ llvm.func @nvvm_tcgen05_mma_sp_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %ades // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -148,35 +148,35 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } @@ -188,35 +188,35 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } @@ -226,67 +226,67 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) llvm.return } @@ -296,67 +296,67 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) llvm.return } @@ -368,35 +368,35 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1(%d_tmem : ! // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) llvm.return } @@ -408,35 +408,35 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2(%d_tmem : ! // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir index 709beb0508bb8..9a52f41de48a6 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir @@ -5,99 +5,99 @@ llvm.func @nvvm_tcgen05_mma_sp_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr< // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -107,99 +107,99 @@ llvm.func @nvvm_tcgen05_mma_sp_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr< // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -212,51 +212,51 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } @@ -268,51 +268,51 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } @@ -322,99 +322,99 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) llvm.return } @@ -424,99 +424,99 @@ llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) llvm.return } @@ -528,51 +528,51 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1(%d_tmem : ! // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) llvm.return } @@ -584,51 +584,51 @@ llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2(%d_tmem : ! // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir index 798e311778beb..67a547398db0a 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir @@ -5,99 +5,99 @@ llvm.func @nvvm_tcgen05_mma_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) llvm.return } @@ -107,99 +107,99 @@ llvm.func @nvvm_tcgen05_mma_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=discard */ i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=fill */ i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=use */ i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) llvm.return } @@ -211,51 +211,51 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: ! // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) llvm.return } @@ -267,51 +267,51 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: ! // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) llvm.return } @@ -321,99 +321,99 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) llvm.return } @@ -423,99 +423,99 @@ llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) llvm.return } @@ -527,51 +527,51 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llv // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) llvm.return } @@ -583,51 +583,51 @@ llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llv // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) + <{kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir index 5f1aeb05888bd..3aa567ff724e6 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir @@ -5,63 +5,63 @@ llvm.func @nvvm_tcgen05_mma_ws(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i6 // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1) llvm.return } @@ -71,63 +71,63 @@ llvm.func @nvvm_tcgen05_mma_ws_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_desc: i6 // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, i64) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir index e390e350090ad..17b9c39ccd05d 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir @@ -5,63 +5,63 @@ llvm.func @nvvm_tcgen05_mma_ws_sp(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -71,63 +71,63 @@ llvm.func @nvvm_tcgen05_mma_ws_sp_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_desc: // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir index f7ce5484803e9..996188bfba636 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir @@ -5,63 +5,63 @@ llvm.func @nvvm_tcgen05_mma_ws_sp(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) llvm.return } @@ -71,63 +71,63 @@ llvm.func @nvvm_tcgen05_mma_ws_sp_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_tmem: // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir index cecbb3fbd90af..e194d48af2ac2 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir @@ -5,63 +5,63 @@ llvm.func @nvvm_tcgen05_mma_ws(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %b // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) llvm.return } @@ -71,63 +71,63 @@ llvm.func @nvvm_tcgen05_mma_ws_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_tmem: !l // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + <{kind = #nvvm.tcgen05_mma_kind, + collectorBBuffer = #nvvm.tcgen05_mma_collectorb}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, + <{kind = #nvvm.tcgen05_mma_kind, collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) + collectorOp = #nvvm.tcgen05_mma_collectorop}> : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir index b8c33516b9135..1e88d11144c37 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-shift.mlir @@ -6,6 +6,6 @@ llvm.func @llvm_nvvm_tcgen05_shift(%taddr : !llvm.ptr<6>) { nvvm.tcgen05.shift %taddr : !llvm.ptr<6> // CHECK: call void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %{{.*}}) - nvvm.tcgen05.shift %taddr {group = #nvvm.cta_group} : !llvm.ptr<6> + nvvm.tcgen05.shift %taddr <{group = #nvvm.cta_group}> : !llvm.ptr<6> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-st.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-st.mlir index 119746133625d..864bec5c2d20d 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-st.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-st.mlir @@ -13,28 +13,28 @@ llvm.func @nvvm_tcgen05_ld_16x64b( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x1(ptr addrspace(6) {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv1 { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x2(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv2 { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x4(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv4 { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x8(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv8 { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x16(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv16 { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x32(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv32 { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x64(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv64 { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x128(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv128 { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } @@ -52,28 +52,28 @@ llvm.func @nvvm_tcgen05_ld_16x64b_pack( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x1(ptr addrspace(6) {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv1 unpack { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x2(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv2 unpack { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x4(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv4 unpack { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x8(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv8 unpack { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x16(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv16 unpack { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x32(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv32 unpack { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x64(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv64 unpack { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x64b.x128(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv128 unpack { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } @@ -91,25 +91,25 @@ llvm.func @nvvm_tcgen05_ld_16x128b( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x1(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv2 { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x2(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv4 { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x4(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv8 { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x8(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv16 { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x16(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv32 { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x32(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv64 { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x64(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv128 { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<128xi32> llvm.return } @@ -127,25 +127,25 @@ llvm.func @nvvm_tcgen05_ld_16x128b_pack( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x1(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv2 unpack { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x2(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv4 unpack { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x4(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv8 unpack { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x8(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv16 unpack { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x16(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv32 unpack { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x32(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv64 unpack { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x128b.x64(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv128 unpack { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<128xi32> llvm.return } @@ -163,22 +163,22 @@ llvm.func @nvvm_tcgen05_ld_16x256b( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x1(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv4 { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x2(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv8 { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x4(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv16 { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x8(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv32 { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x16(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv64 { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x32(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv128 { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<128xi32> llvm.return } @@ -196,22 +196,22 @@ llvm.func @nvvm_tcgen05_ld_16x256b_pack( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x1(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv4 unpack { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x2(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv8 unpack { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x4(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv16 unpack { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x8(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv32 unpack { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x16(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv64 unpack { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x256b.x32(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv128 unpack { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<128xi32> llvm.return } @@ -229,28 +229,28 @@ llvm.func @nvvm_tcgen05_ld_32x32b( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x1(ptr addrspace(6) {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv1 { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x2(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv2 { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x4(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv4 { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x8(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv8 { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x16(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv16 { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x32(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv32 { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x64(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv64 { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x128(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv128 { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } @@ -268,28 +268,28 @@ llvm.func @nvvm_tcgen05_ld_32x32b_pack( %stv128 : vector<128xi32>) { // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x1(ptr addrspace(6) {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv1 unpack { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x2(ptr addrspace(6) {{%[0-9]+}}, <2 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv2 unpack { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x4(ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv4 unpack { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x8(ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv8 unpack { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x16(ptr addrspace(6) {{%[0-9]+}}, <16 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv16 unpack { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x32(ptr addrspace(6) {{%[0-9]+}}, <32 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv32 unpack { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x64(ptr addrspace(6) {{%[0-9]+}}, <64 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv64 unpack { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.32x32b.x128(ptr addrspace(6) {{%[0-9]+}}, <128 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv128 unpack { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128 unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } @@ -309,28 +309,28 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2( %offset = llvm.mlir.constant(2:i64) : i64 // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x1(ptr addrspace(6) {{%[0-9]+}}, i64 2, i32 {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv1, %offset { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x2(ptr addrspace(6) {{%[0-9]+}}, i64 2, <2 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv2, %offset { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x4(ptr addrspace(6) {{%[0-9]+}}, i64 2, <4 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv4, %offset { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x8(ptr addrspace(6) {{%[0-9]+}}, i64 2, <8 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv8, %offset { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x16(ptr addrspace(6) {{%[0-9]+}}, i64 2, <16 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv16, %offset { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x32(ptr addrspace(6) {{%[0-9]+}}, i64 2, <32 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv32, %offset { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x64(ptr addrspace(6) {{%[0-9]+}}, i64 2, <64 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv64, %offset { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x128(ptr addrspace(6) {{%[0-9]+}}, i64 2, <128 x i32> {{%[0-9]+}}, i1 false) - nvvm.tcgen05.st %tmemAddr, %stv128, %offset { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128, %offset <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } @@ -350,28 +350,28 @@ llvm.func @nvvm_tcgen05_ld_16x32bx2_pack( %offset = llvm.mlir.constant(2:i64) : i64 // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x1(ptr addrspace(6) {{%[0-9]+}}, i64 2, i32 {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv1, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=1:i32 } : i32 + nvvm.tcgen05.st %tmemAddr, %stv1, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 1 : i32} : i32 // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x2(ptr addrspace(6) {{%[0-9]+}}, i64 2, <2 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv2, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=2:i32 } : vector<2xi32> + nvvm.tcgen05.st %tmemAddr, %stv2, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 2 : i32} : vector<2xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x4(ptr addrspace(6) {{%[0-9]+}}, i64 2, <4 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv4, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=4:i32 } : vector<4xi32> + nvvm.tcgen05.st %tmemAddr, %stv4, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 4 : i32} : vector<4xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x8(ptr addrspace(6) {{%[0-9]+}}, i64 2, <8 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv8, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=8:i32 } : vector<8xi32> + nvvm.tcgen05.st %tmemAddr, %stv8, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 8 : i32} : vector<8xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x16(ptr addrspace(6) {{%[0-9]+}}, i64 2, <16 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv16, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=16:i32 } : vector<16xi32> + nvvm.tcgen05.st %tmemAddr, %stv16, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 16 : i32} : vector<16xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x32(ptr addrspace(6) {{%[0-9]+}}, i64 2, <32 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv32, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=32:i32 } : vector<32xi32> + nvvm.tcgen05.st %tmemAddr, %stv32, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 32 : i32} : vector<32xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x64(ptr addrspace(6) {{%[0-9]+}}, i64 2, <64 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv64, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=64:i32 } : vector<64xi32> + nvvm.tcgen05.st %tmemAddr, %stv64, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 64 : i32} : vector<64xi32> // CHECK: call void @llvm.nvvm.tcgen05.st.16x32bx2.x128(ptr addrspace(6) {{%[0-9]+}}, i64 2, <128 x i32> {{%[0-9]+}}, i1 true) - nvvm.tcgen05.st %tmemAddr, %stv128, %offset unpack { shape = #nvvm.tcgen05_ldst_shape, num=128:i32 } : vector<128xi32> + nvvm.tcgen05.st %tmemAddr, %stv128, %offset unpack <{shape = #nvvm.tcgen05_ldst_shape}> {num = 128 : i32} : vector<128xi32> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_im2col.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_im2col.mlir index 2fb98d3c1215e..2a1f1aafd563a 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_im2col.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_im2col.mlir @@ -16,20 +16,20 @@ llvm.func @tma_load_3d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.3d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i16 %7, i64 %8, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -49,20 +49,20 @@ llvm.func @tma_load_4d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.4d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i16 0, i64 0, i1 false, i1 false, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] multicast_mask = %mask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -82,20 +82,20 @@ llvm.func @tma_load_5d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.5d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i16 %10, i16 0, i64 0, i1 false, i1 false, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] multicast_mask = %mask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -115,20 +115,20 @@ llvm.func @tma_load_3d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.3d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i16 %7, i16 %8, i64 %9, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -148,20 +148,20 @@ llvm.func @tma_load_4d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.4d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i16 %9, i64 %10, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -181,20 +181,20 @@ llvm.func @tma_load_5d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.5d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i16 %10, i64 %11, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -214,20 +214,20 @@ llvm.func @tma_load_3d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.128.3d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i16 %7, i16 %8, i64 %9, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -247,20 +247,20 @@ llvm.func @tma_load_4d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.128.4d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i16 %9, i64 %10, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -280,19 +280,19 @@ llvm.func @tma_load_5d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.w.128.5d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i16 %10, i64 %11, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_tile.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_tile.mlir index de0b929e6db72..b135bf49c6298 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_tile.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_load_cluster_tile.mlir @@ -16,20 +16,20 @@ llvm.func @tma_load_1d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i16 %4, i64 %5, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -50,20 +50,20 @@ llvm.func @tma_load_2d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.2d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i16 %5, i64 %6, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -84,20 +84,20 @@ llvm.func @tma_load_3d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.3d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i64 %7, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -118,20 +118,20 @@ llvm.func @tma_load_4d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.4d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i64 %8, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -157,15 +157,15 @@ llvm.func @tma_load_5d_all(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: !llvm.ptr nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cache : !llvm.ptr<7>, !llvm.ptr nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask l2_cache_hint = %cache : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cache {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask l2_cache_hint = %cache {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cache <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask l2_cache_hint = %cache <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cache {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask l2_cache_hint = %cache {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cache <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] multicast_mask = %ctamask l2_cache_hint = %cache <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -185,20 +185,20 @@ llvm.func @tma_load_2d_tile_gather4(%tma: !llvm.ptr, %dest: !llvm.ptr<7>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.gather4.2d(ptr addrspace(7) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i64 %9, i1 true, i1 true, i32 2) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr - - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr + + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_im2col.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_im2col.mlir index 0ebae19a682be..39fc1ab2dcf0d 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_im2col.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_im2col.mlir @@ -6,8 +6,8 @@ llvm.func @tma_load_3d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.3d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i64 %7, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%off0] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -18,8 +18,8 @@ llvm.func @tma_load_4d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.4d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i64 0, i1 false) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%off0, %off1] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -30,8 +30,8 @@ llvm.func @tma_load_5d_im2col(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llvm. // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.5d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i16 %10, i64 0, i1 false) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%off0, %off1, %off2] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -42,8 +42,8 @@ llvm.func @tma_load_3d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.3d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i16 %7, i64 %8, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -54,8 +54,8 @@ llvm.func @tma_load_4d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.4d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i64 %9, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -66,8 +66,8 @@ llvm.func @tma_load_5d_im2col_w(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.5d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i64 %10, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -78,8 +78,8 @@ llvm.func @tma_load_3d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.128.3d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i16 %6, i16 %7, i64 %8, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -90,8 +90,8 @@ llvm.func @tma_load_4d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.128.4d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i16 %7, i16 %8, i64 %9, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -102,8 +102,8 @@ llvm.func @tma_load_5d_im2col_w_128(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.im2col.w.128.5d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i16 %8, i16 %9, i64 %10, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] im2col[%wHalo, %wOffset] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_tile.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_tile.mlir index f11de711ca50a..f15c38eb821a8 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_tile.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_load_cta_tile.mlir @@ -6,8 +6,8 @@ llvm.func @tma_load_1d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.1d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i64 %4, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -18,8 +18,8 @@ llvm.func @tma_load_2d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.2d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i64 %5, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -30,8 +30,8 @@ llvm.func @tma_load_3d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.3d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i64 %6, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -42,8 +42,8 @@ llvm.func @tma_load_4d_all_tile(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llv // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.4d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i64 %7, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -54,8 +54,8 @@ llvm.func @tma_load_5d_all(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: !llvm.ptr // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.5d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i64 %8, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] {isCTAOnly = true} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cacheHint {isCTAOnly = true} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] <{isCTAOnly = true}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%crd0, %crd1, %crd2, %crd3, %crd4] l2_cache_hint = %cacheHint <{isCTAOnly = true}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -66,8 +66,8 @@ llvm.func @tma_load_2d_tile_gather4(%tma: !llvm.ptr, %dest: !llvm.ptr<3>, %bar: // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.g2s.cta.tile.gather4.2d(ptr addrspace(3) %1, ptr addrspace(3) %2, ptr %0, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i64 %8, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma, %bar, box[%row0, %col0, %col1, %col2, %col3] l2_cache_hint = %cacheHint <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_load_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_load_invalid.mlir index d94ea41f6bb38..1279b76293a49 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_load_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_load_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @tma_load_1d_im2col(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: !llvm.ptr<3>, %crd0: i32, %ch : i64) { // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0] {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -22,7 +22,7 @@ llvm.func @tma_load_0d(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: !llvm.p llvm.func @tma_load_gather(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %ch : i64) { // expected-error @below {{Gather4 mode expects 5 coordinates}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch {mode = #nvvm.tma_load_mode}: !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch <{mode = #nvvm.tma_load_mode}>: !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -31,7 +31,7 @@ llvm.func @tma_load_gather(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: !ll llvm.func @tma_load_asm_im2col(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %wHalo: i16, %wOffset: i16, %p : i1) { // expected-error @below {{Predicate is supported only for Tile and Im2col modes.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] predicate=%p {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] predicate=%p <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -39,7 +39,7 @@ llvm.func @tma_load_asm_im2col(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar: llvm.func @tma_load_cta_asm_im2col(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %wHalo: i16, %wOffset: i16, %p : i1) { // expected-error @below {{Predicate is supported only for shared::cluster mode.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] predicate=%p {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1, %crd2] im2col[%wHalo, %wOffset] predicate=%p <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -48,7 +48,7 @@ llvm.func @tma_load_cta_asm_im2col(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, % llvm.func @tma_load_cta_0d(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !llvm.ptr<3>) { // expected-error @below {{expects coordinates between 1 to 5 dimension}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[] {isCTAOnly = true} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[] <{isCTAOnly = true}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -57,7 +57,7 @@ llvm.func @tma_load_cta_0d(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !l llvm.func @tma_load_cta_mc(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !llvm.ptr<3>, %crd0: i32, %ctamask : i16) { // expected-error @below {{Multicast is not supported with shared::cta mode.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0] multicast_mask = %ctamask {isCTAOnly = true, mode = #nvvm.tma_load_mode} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0] multicast_mask = %ctamask <{isCTAOnly = true, mode = #nvvm.tma_load_mode}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -65,7 +65,7 @@ llvm.func @tma_load_cta_mc(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !l llvm.func @tma_load_cta_cg(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !llvm.ptr<3>, %crd0: i32, %crd1: i32) { // expected-error @below {{CTAGroup is not supported with shared::cta mode.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] {isCTAOnly = true, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] <{isCTAOnly = true, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -74,7 +74,7 @@ llvm.func @tma_load_cta_cg(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !l llvm.func @tma_load_cta_with_7(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar : !llvm.ptr<3>, %crd0: i32, %crd1: i32) { // expected-error @below {{Shared::cta destination requires address-space 3.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] {isCTAOnly = true, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] <{isCTAOnly = true, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<7>, !llvm.ptr llvm.return } @@ -83,7 +83,7 @@ llvm.func @tma_load_cta_with_7(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<7>, %bar llvm.func @tma_load_cluster_with_3(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, %bar : !llvm.ptr<3>, %crd0: i32, %crd1: i32) { // expected-error @below {{Shared::cluster destination requires address-space 7.}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] {isCTAOnly = false, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group} : !llvm.ptr<3>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tma_desc, %bar, box[%crd0, %crd1] <{isCTAOnly = false, mode = #nvvm.tma_load_mode, group = #nvvm.cta_group}> : !llvm.ptr<3>, !llvm.ptr llvm.return } @@ -92,7 +92,7 @@ llvm.func @tma_load_cluster_with_3(%tma_desc: !llvm.ptr, %dest : !llvm.ptr<3>, % llvm.func @tma_load_im2col_off(%tmaDescriptor: !llvm.ptr, %dest : !llvm.ptr<7>, %barrier: !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %off0: i16, %off1: i16, %ctamask : i16, %cacheHint : i64) { // expected-error @below {{im2col offsets expected 2 (provided 1)}} - nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint {mode = #nvvm.tma_load_mode} : !llvm.ptr<7>, !llvm.ptr + nvvm.cp.async.bulk.tensor.shared.cluster.global %dest, %tmaDescriptor, %barrier, box[%crd0,%crd1,%crd2,%crd3] im2col[%off0] multicast_mask = %ctamask l2_cache_hint = %cacheHint <{mode = #nvvm.tma_load_mode}> : !llvm.ptr<7>, !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir index 536b52b034db8..3bb4c31b95fa1 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch.mlir @@ -28,7 +28,7 @@ llvm.func @tma_prefetch_2d(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i6 // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(ptr %0, i32 %1, i32 %2, i64 %3, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1] {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1] l2_cache_hint = %ch : !llvm.ptr llvm.return } @@ -48,14 +48,14 @@ llvm.func @tma_prefetch_3d(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i3 nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] : !llvm.ptr nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] l2_cache_hint = %ch : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -74,14 +74,14 @@ llvm.func @tma_prefetch_4d(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i3 nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] : !llvm.ptr nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -100,14 +100,14 @@ llvm.func @tma_prefetch_5d(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i3 nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] : !llvm.ptr nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1, %off2] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1, %off2] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1, %off2] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1, %off2] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -117,7 +117,7 @@ llvm.func @tma_prefetch_gather4_2d(%tma_desc : !llvm.ptr, %x0 : i32, %y1 : i32, // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.gather4.2d(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i64 %6, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] {mode = #nvvm.tma_load_mode} : !llvm.ptr - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch_invalid.mlir index 23e47bd594b78..49ba36300ee7e 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_prefetch_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_prefetch_invalid.mlir @@ -10,7 +10,7 @@ llvm.func @tma_prefetch_0d(%tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) { llvm.func @tma_prefetch_2d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %off0 : i16, %ch : i64) { // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1] im2col[%off0] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1] im2col[%off0] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -18,7 +18,7 @@ llvm.func @tma_prefetch_2d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, % llvm.func @tma_prefetch_5d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %d4 : i32, %off0 : i16, %off1 : i16, %off2 : i16, %ch : i64) { // expected-error @below {{im2col offsets expected 3 (provided 2)}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -26,7 +26,7 @@ llvm.func @tma_prefetch_5d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, % llvm.func @tma_prefetch_3d_im2col_w(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %off0 : i16) { // expected-error @below {{im2col offsets expected 2 (provided 1)}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2] im2col[%off0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -34,7 +34,7 @@ llvm.func @tma_prefetch_3d_im2col_w(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, llvm.func @tma_prefetch_4d_im2col_w_128(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %off0 : i16) { // expected-error @below {{im2col offsets expected 2 (provided 1)}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0] {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3] im2col[%off0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -42,7 +42,7 @@ llvm.func @tma_prefetch_4d_im2col_w_128(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : llvm.func @tma_prefetch_gather4_3d(%tma_desc : !llvm.ptr, %d0 : i32) { // expected-error @below {{Gather4 mode expects 5 coordinates}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0] {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0] <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } @@ -50,7 +50,7 @@ llvm.func @tma_prefetch_gather4_3d(%tma_desc : !llvm.ptr, %d0 : i32) { llvm.func @tma_prefetch_gather4_2d(%tma_desc : !llvm.ptr, %x0 : i32, %y1 : i32, %y2 : i32, %y3 : i32, %y4 : i32, %off0 : i16, %ch : i64) { // expected-error @below {{im2col offsets expected 0 (provided 1)}} - nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] im2col[%off0] l2_cache_hint = %ch {mode = #nvvm.tma_load_mode} : !llvm.ptr + nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%x0, %y1, %y2, %y3, %y4] im2col[%off0] l2_cache_hint = %ch <{mode = #nvvm.tma_load_mode}> : !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store.mlir index b77927fcfb47b..a8cd9d40a3bce 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_store.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_store.mlir @@ -38,9 +38,9 @@ llvm.func @tma_store_3d(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, % nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2] l2_cache_hint=%ch : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2] {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2] <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2] l2_cache_hint=%ch {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2] l2_cache_hint=%ch <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -56,9 +56,9 @@ llvm.func @tma_store_4d(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, % nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -74,9 +74,9 @@ llvm.func @tma_store_5d(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, % nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] l2_cache_hint=%ch : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] l2_cache_hint=%ch {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] l2_cache_hint=%ch <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -86,9 +86,9 @@ llvm.func @tma_store_scatter(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i // CHECK-NEXT: call void @llvm.nvvm.cp.async.bulk.tensor.s2g.tile.scatter4.2d(ptr addrspace(3) %1, ptr %0, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i64 %7, i1 true) // CHECK-NEXT: ret void // CHECK-NEXT: } - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] l2_cache_hint=%ch {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3,%crd4] l2_cache_hint=%ch <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_invalid.mlir index 9d9dc8e35b5db..30958d00cfc92 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_store_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @tma_store_1d_im2col(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %ch : i64) { // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}} - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0] {mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0] <{mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -22,7 +22,7 @@ llvm.func @tma_store_0d(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>) { llvm.func @tma_store_scatter(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %crd3: i32, %ch : i64) { // expected-error @below {{Scatter4 mode expects 5 coordinates}} - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch {mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0,%crd1,%crd2,%crd3] l2_cache_hint=%ch <{mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -40,7 +40,7 @@ llvm.func @tma_store_asm_ch(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i3 llvm.func @tma_store_asm_im2col(%tma_desc: !llvm.ptr, %src : !llvm.ptr<3>, %crd0: i32, %crd1: i32, %crd2: i32, %ch : i64, %p : i1) { // expected-error @below {{Inline-ptx lowering supported only for Tile mode.}} - nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0, %crd1, %crd2], predicate=%p {mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.global.shared.cta %tma_desc, %src, box[%crd0, %crd1, %crd2], predicate=%p <{mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir index 2231f1dabd504..e387363c3ce3a 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir @@ -10,14 +10,14 @@ llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 0, i1 false) @@ -27,14 +27,14 @@ llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -50,14 +50,14 @@ llvm.func @tma_store_reduce_2d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 0, i1 false) @@ -67,14 +67,14 @@ llvm.func @tma_store_reduce_2d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -90,14 +90,14 @@ llvm.func @tma_store_reduce_3d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) @@ -107,14 +107,14 @@ llvm.func @tma_store_reduce_3d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -128,14 +128,14 @@ llvm.func @tma_store_reduce_3d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) @@ -145,14 +145,14 @@ llvm.func @tma_store_reduce_3d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -168,14 +168,14 @@ llvm.func @tma_store_reduce_4d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) @@ -185,14 +185,14 @@ llvm.func @tma_store_reduce_4d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -206,14 +206,14 @@ llvm.func @tma_store_reduce_4d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) @@ -223,14 +223,14 @@ llvm.func @tma_store_reduce_4d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -246,14 +246,14 @@ llvm.func @tma_store_reduce_5d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) @@ -263,14 +263,14 @@ llvm.func @tma_store_reduce_5d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind}> : !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -284,14 +284,14 @@ llvm.func @tma_store_reduce_5d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) @@ -301,13 +301,13 @@ llvm.func @tma_store_reduce_5d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 0, i1 false) - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce_invalid.mlir index 2fcf00fa3b670..704b1187c1417 100644 --- a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce_invalid.mlir @@ -4,7 +4,7 @@ llvm.func @tma_reduce_0d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %ch : i64) { // expected-error @below {{expects coordinates between 1 to 5 dimension}} - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[] {redKind = #nvvm.tma_redux_kind}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[] <{redKind = #nvvm.tma_redux_kind}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -12,7 +12,7 @@ llvm.func @tma_reduce_0d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %ch : i64) llvm.func @tma_reduce_2d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i64) { // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}} - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}>: !llvm.ptr, !llvm.ptr<3> llvm.return } @@ -20,6 +20,6 @@ llvm.func @tma_reduce_2d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 llvm.func @tma_store_reduce_scatter(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) { // expected-error @below {{Scatter mode unsupported for CpAsyncBulkTensorReduceOp}} - nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] <{redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}> : !llvm.ptr, !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir b/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir index 5b7ecc9e1c87c..fb698522dbc6b 100644 --- a/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir @@ -10,7 +10,7 @@ llvm.func @nvvm_sin(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_sin_ftz llvm.func @nvvm_sin_ftz(%arg0: f32) -> f32 { // CHECK: call float @llvm.nvvm.sin.approx.ftz.f(float %{{.*}}) - %0 = nvvm.sin %arg0 {ftz = true} : f32 + %0 = nvvm.sin %arg0, ftz = true : f32 llvm.return %0 : f32 } @@ -24,7 +24,7 @@ llvm.func @nvvm_cos(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_cos_ftz llvm.func @nvvm_cos_ftz(%arg0: f32) -> f32 { // CHECK: call float @llvm.nvvm.cos.approx.ftz.f(float %{{.*}}) - %0 = nvvm.cos %arg0 {ftz = true} : f32 + %0 = nvvm.cos %arg0, ftz = true : f32 llvm.return %0 : f32 } @@ -38,7 +38,7 @@ llvm.func @nvvm_lg2(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_lg2_ftz llvm.func @nvvm_lg2_ftz(%arg0: f32) -> f32 { // CHECK: call float @llvm.nvvm.lg2.approx.ftz.f(float %{{.*}}) - %0 = nvvm.log2 %arg0 {ftz = true} : f32 + %0 = nvvm.log2 %arg0, ftz = true : f32 llvm.return %0 : f32 } @@ -52,6 +52,6 @@ llvm.func @nvvm_ex2(%arg0: f32) -> f32 { // CHECK-LABEL: @nvvm_ex2_ftz llvm.func @nvvm_ex2_ftz(%arg0: f32) -> f32 { // CHECK: call float @llvm.nvvm.ex2.approx.ftz.f32(float %{{.*}}) - %0 = nvvm.ex2 %arg0 {ftz = true} : f32 + %0 = nvvm.ex2 %arg0, ftz = true : f32 llvm.return %0 : f32 } diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 6e96e918d5f0d..6dbe0d030977a 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -85,7 +85,7 @@ llvm.func @nvvm_fence_proxy_release() { llvm.func @convert_float_to_tf32_rna_relu(%src : f32) -> i32 { // expected-error @below {{Relu not supported with rna rounding mode.}} - %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + %res = nvvm.convert.float.to.tf32 %src <{rnd = #nvvm.fp_rnd_mode, relu=true}> llvm.return %res : i32 } @@ -109,7 +109,7 @@ llvm.func @nvvm_st_bulk_initval_nonzero(%addr : !llvm.ptr, %size : i64) { llvm.func @nvvm_tcgen05_cp_128x256b_mc(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // expected-error @below {{Invalid multicast type for tcgen05.cp Op}} - nvvm.tcgen05.cp %taddr, %smem_desc {shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast} + nvvm.tcgen05.cp %taddr, %smem_desc <{shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast}> llvm.return } @@ -117,10 +117,10 @@ llvm.func @nvvm_tcgen05_cp_128x256b_mc(%taddr : !llvm.ptr<6>, %smem_desc : i64) llvm.func @nvvm_tcgen05_cp_32x128b_wx2(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // expected-error @below {{Shape 32x128b requires multicast warpx4 for tcgen05.cp Op}} - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast - } + }> llvm.return } @@ -128,10 +128,10 @@ llvm.func @nvvm_tcgen05_cp_32x128b_wx2(%taddr : !llvm.ptr<6>, %smem_desc : i64) llvm.func @nvvm_tcgen05_cp_64x128b(%taddr : !llvm.ptr<6>, %smem_desc : i64) { // expected-error @below {{Shape 64x128b requires multicast warpx2_01_23 or warpx2_02_13 for tcgen05.cp Op}} - nvvm.tcgen05.cp %taddr, %smem_desc { + nvvm.tcgen05.cp %taddr, %smem_desc <{ shape = #nvvm.tcgen05_cp_shape, multicast = #nvvm.tcgen05_cp_multicast - } + }> llvm.return } @@ -155,7 +155,7 @@ llvm.func @nvvm_match_sync_any(%val32: i32, %thread_mask: i32) { llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e4m3(%a : f32, %b : f32) { // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E4M3FN) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E4M3FN) llvm.return } @@ -163,7 +163,7 @@ llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e4m3(%a : f32, %b : f32) { llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e5m2(%a : f32, %b : f32) { // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E5M2) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E5M2) llvm.return } @@ -171,7 +171,7 @@ llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e5m2(%a : f32, %b : f32) { llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_ue8m0(%a : f32, %b : f32) { // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from f32x2 to 'f8E8M0FNU' type}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode} : i16 (f8E8M0FNU) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode}> : i16 (f8E8M0FNU) llvm.return } @@ -179,7 +179,7 @@ llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_ue8m0(%a : f32, %b : f32) { llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e4m3(%a : f32, %b : f32) { // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E4M3FN) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E4M3FN) llvm.return } @@ -187,7 +187,7 @@ llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e4m3(%a : f32, %b : f32) { llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e5m2(%a : f32, %b : f32) { // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : i16 (f8E5M2) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode}> : i16 (f8E5M2) llvm.return } @@ -195,7 +195,7 @@ llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e5m2(%a : f32, %b : f32) { llvm.func @nvvm_cvt_float_to_f8x2_relu_not_supported_ue8m0(%a : f32, %b : f32) { // expected-error @below {{relu not supported for conversions to 'f8E8M0FNU' type}} - %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode, relu = true} : i16 (f8E8M0FNU) + %res = nvvm.convert.f32x2.to.f8x2 %a, %b <{rnd = #nvvm.fp_rnd_mode, relu = true}> : i16 (f8E8M0FNU) llvm.return } @@ -379,7 +379,7 @@ llvm.func @nvvm_prefetch_no_level_or_tensormap(%gen_ptr: !llvm.ptr) { llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected num attribute to be 1, 2 or 4}} - nvvm.stmatrix %arg0, %r1, %r2, %r3 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2, %r3 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32, i32 llvm.return } @@ -387,7 +387,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected shape to be 8x8 or 16x8}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -395,14 +395,14 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B16 for 8x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } // ----- llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B8 for 16x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -410,7 +410,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected layout to be col for 16x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -418,7 +418,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected num attribute to be 1, 2 or 4}} - nvvm.stmatrix %arg0, %r1, %r2, %r3 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2, %r3 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32, i32 llvm.return } @@ -426,7 +426,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected shape to be 8x8 or 16x8}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -434,14 +434,14 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B16 for 8x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } // ----- llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B8 for 16x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -449,7 +449,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // expected-error@+1 {{'nvvm.stmatrix' op expected layout to be col for 16x8 matrix}} - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 llvm.return } @@ -457,7 +457,7 @@ llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32 llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4 for 8x8 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 3 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } @@ -465,7 +465,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is i32}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32)> + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32)> llvm.return } @@ -473,7 +473,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 4 elements of type i32}} - %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %l = nvvm.ldmatrix %arg0 <{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> llvm.return } @@ -481,7 +481,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected element type to be b16 for 8x8 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } @@ -489,7 +489,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4 for 8x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32)> + %l = nvvm.ldmatrix %arg0 <{num = 3 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32)> llvm.return } @@ -497,7 +497,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected layout to be row for 8x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } @@ -505,7 +505,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected element type to be b8x16.b4x16_p64 or b8x16.b6x16_p32 for 8x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } @@ -513,7 +513,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1 or 2 for 16x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l = nvvm.ldmatrix %arg0 <{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } @@ -521,7 +521,7 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected layout to be col for 16x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } @@ -529,13 +529,13 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected element type to be b8, b8x16.b4x16_p64 or b8x16.b6x16_p32 for 16x16 matrix}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 2 elements of type i32}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 llvm.return } @@ -543,8 +543,8 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.func @mov_matrix(%src : i32) -> i32 { // expected-error@+1 {{'nvvm.movmatrix' op expected shape to be 8x8}} - %dst = nvvm.movmatrix %src {shape = #nvvm.ld_st_matrix_shape, - eltType = #nvvm.ld_st_matrix_elt_type} : i32 + %dst = nvvm.movmatrix %src <{shape = #nvvm.ld_st_matrix_shape, + eltType = #nvvm.ld_st_matrix_elt_type}> : i32 llvm.return %dst : i32 } @@ -552,9 +552,9 @@ llvm.func @mov_matrix(%src : i32) -> i32 { llvm.func @mov_matrix(%src : i32) -> i32 { // expected-error@+1 {{'nvvm.movmatrix' op expected layout to be col}} - %dst = nvvm.movmatrix %src {shape = #nvvm.ld_st_matrix_shape, + %dst = nvvm.movmatrix %src <{shape = #nvvm.ld_st_matrix_shape, layout = #nvvm.mma_layout, - eltType = #nvvm.ld_st_matrix_elt_type} : i32 + eltType = #nvvm.ld_st_matrix_elt_type}> : i32 llvm.return %dst : i32 } @@ -562,8 +562,8 @@ llvm.func @mov_matrix(%src : i32) -> i32 { llvm.func @mov_matrix(%src : i32) -> i32 { // expected-error@+1 {{'nvvm.movmatrix' op expected element type to be b16}} - %dst = nvvm.movmatrix %src {shape = #nvvm.ld_st_matrix_shape, - eltType = #nvvm.ld_st_matrix_elt_type} : i32 + %dst = nvvm.movmatrix %src <{shape = #nvvm.ld_st_matrix_shape, + eltType = #nvvm.ld_st_matrix_elt_type}> : i32 llvm.return %dst : i32 } @@ -611,7 +611,7 @@ func.func @invalid_range_equal_bounds() { llvm.func @nvvm_wmma_load_a_f64(%arg0: !llvm.ptr, %arg1 : i32) { // expected-error @below {{'nvvm.wmma.load' op expected destination type to be f64}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : (!llvm.ptr) -> !llvm.struct<(f64)> llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index f2888025d8a08..481726eca64dc 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -177,7 +177,7 @@ llvm.func @llvm_nvvm_cluster_arrive() { // CHECK: call void @llvm.nvvm.barrier.cluster.arrive() nvvm.cluster.arrive // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.aligned() - nvvm.cluster.arrive {aligned} + nvvm.cluster.arrive aligned llvm.return } @@ -186,7 +186,7 @@ llvm.func @llvm_nvvm_cluster_arrive_relaxed() { // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed() nvvm.cluster.arrive.relaxed // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned() - nvvm.cluster.arrive.relaxed {aligned} + nvvm.cluster.arrive.relaxed aligned llvm.return } @@ -195,7 +195,7 @@ llvm.func @llvm_nvvm_cluster_wait() { // CHECK: call void @llvm.nvvm.barrier.cluster.wait() nvvm.cluster.wait // CHECK: call void @llvm.nvvm.barrier.cluster.wait.aligned() - nvvm.cluster.wait {aligned} + nvvm.cluster.wait aligned llvm.return } @@ -227,21 +227,21 @@ llvm.func @nvvm_shfl_pred( %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> !llvm.struct<(i32, i1)> { // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %6 = nvvm.shfl.sync bfly %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %6 = nvvm.shfl.sync bfly %0, %3, %1, %2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %7 = nvvm.shfl.sync bfly %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %7 = nvvm.shfl.sync bfly %0, %4, %1, %2 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %8 = nvvm.shfl.sync up %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %8 = nvvm.shfl.sync up %0, %3, %1, %2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %9 = nvvm.shfl.sync up %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %9 = nvvm.shfl.sync up %0, %4, %1, %2 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %10 = nvvm.shfl.sync down %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %10 = nvvm.shfl.sync down %0, %3, %1, %2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %11 = nvvm.shfl.sync down %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %11 = nvvm.shfl.sync down %0, %4, %1, %2 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %12 = nvvm.shfl.sync idx %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %12 = nvvm.shfl.sync idx %0, %3, %1, %2 <{return_value_and_is_valid}> : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %13 = nvvm.shfl.sync idx %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %13 = nvvm.shfl.sync idx %0, %4, %1, %2 <{return_value_and_is_valid}> : f32 -> !llvm.struct<(f32, i1)> llvm.return %6 : !llvm.struct<(i32, i1)> } @@ -386,7 +386,7 @@ llvm.func @nvvm_mma_m16n8k4_tf32_f32(%a0 : i32, %a1 : i32, llvm.func @gpu_wmma_load_op(%arg0: !llvm.ptr<3>, %arg1: i32) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return @@ -400,7 +400,7 @@ llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr<3>, %arg1: i32, %arg4: vector<2 xf16>, %arg5: vector<2 x f16>) { // CHECK: call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3(ptr addrspace(3) %{{.*}}, <2 x half> {{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, i32 %{{.*}}) nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 - {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : !llvm.ptr<3>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -420,7 +420,7 @@ llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}) %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, @@ -433,7 +433,7 @@ llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0(ptr %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } @@ -444,7 +444,7 @@ llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) { // CHECK: { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32}> : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return @@ -454,7 +454,7 @@ llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : llvm.func @nvvm_wmma_load_a_f64(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call double @llvm.nvvm.wmma.m8n8k4.load.a.row.stride.f64.p0(ptr %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : (!llvm.ptr) -> f64 llvm.return } @@ -463,7 +463,7 @@ llvm.func @nvvm_wmma_load_a_f64(%arg0: !llvm.ptr, %arg1 : i32) { llvm.func @nvvm_wmma_load_c_f64(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call { double, double } @llvm.nvvm.wmma.m8n8k4.load.c.row.stride.f64.p0(ptr %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} + <{eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : (!llvm.ptr) -> !llvm.struct<(f64, f64)> llvm.return } @@ -472,7 +472,7 @@ llvm.func @nvvm_wmma_load_c_f64(%arg0: !llvm.ptr, %arg1 : i32) { llvm.func @nvvm_wmma_mma_f64(%0 : f64, %1 : f64, %2 : f64, %3 : f64) { // CHECK: { double, double } @llvm.nvvm.wmma.m8n8k4.mma.row.col.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}) %r = nvvm.wmma.mma %0, %1, %2, %3 - {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 4 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} + <{eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 4 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : (f64, f64, f64, f64) -> !llvm.struct<(f64, f64)> llvm.return @@ -482,7 +482,7 @@ llvm.func @nvvm_wmma_mma_f64(%0 : f64, %1 : f64, %2 : f64, %3 : f64) { llvm.func @nvvm_wmma_store_d_f64(%arg0: !llvm.ptr, %arg1 : i32, %arg2 : f64, %arg3 : f64) { // CHECK: call void @llvm.nvvm.wmma.m8n8k4.store.d.row.stride.f64.p0(ptr %{{.*}}, double %{{.*}}, double %{{.*}}, i32 %{{.*}}) nvvm.wmma.store %arg0, %arg1, %arg2, %arg3 - {eltype = #nvvm.mma_type, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32} + <{eltype = #nvvm.mma_type, k = 4 : i32, layout = #nvvm.mma_layout, m = 8 : i32, n = 8 : i32}> : !llvm.ptr, f64, f64 llvm.return } @@ -541,87 +541,87 @@ llvm.func @llvm_nvvm_cp_async_bulk_wait_group() { // CHECK: call void @llvm.nvvm.cp.async.bulk.wait.group(i32 3) nvvm.cp.async.bulk.wait_group 3 // CHECK: call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 0) - nvvm.cp.async.bulk.wait_group 0 {read} + nvvm.cp.async.bulk.wait_group 0 <{read}> // CHECK: call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 3) - nvvm.cp.async.bulk.wait_group 3 {read} + nvvm.cp.async.bulk.wait_group 3 <{read}> llvm.return } // CHECK-LABEL: @ld_matrix llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %{{.*}}) - %l1 = nvvm.ldmatrix %arg0 {num = 1: i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l1 = nvvm.ldmatrix %arg0 <{num = 1: i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %{{.*}}) - %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %l2 = nvvm.ldmatrix %arg0 <{num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %{{.*}}) - %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l4 = nvvm.ldmatrix %arg0 <{num = 4 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %{{.*}}) - %l1t = nvvm.ldmatrix %arg0 {num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %l1t = nvvm.ldmatrix %arg0 <{num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %{{.*}}) - %l2t = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %l2t = nvvm.ldmatrix %arg0 <{num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %{{.*}}) - %l4t = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l4t = nvvm.ldmatrix %arg0 <{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x1.b8x16.b6x16_p32.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b6_l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %m8n16_b6_l1 = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x2.b8x16.b6x16_p32.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b6_l2 = nvvm.ldmatrix %arg0 {num = 2: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %m8n16_b6_l2 = nvvm.ldmatrix %arg0 <{num = 2: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x4.b8x16.b6x16_p32.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b6_l4 = nvvm.ldmatrix %arg0{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %m8n16_b6_l4 = nvvm.ldmatrix %arg0<{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x1.b8x16.b4x16_p64.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b4_l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> i32 + %m8n16_b4_l1 = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> i32 // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x2.b8x16.b4x16_p64.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b4_l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %m8n16_b4_l2 = nvvm.ldmatrix %arg0 <{num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n16.x4.b8x16.b4x16_p64.p3(ptr addrspace(3) %{{.*}}) - %m8n16_b4_l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %m8n16_b4_l4 = nvvm.ldmatrix %arg0 <{num = 4 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x1.trans.b8.p3(ptr addrspace(3) %{{.*}}) - %m16n16_l1t = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %m16n16_l1t = nvvm.ldmatrix %arg0 <{num = 1 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x2.trans.b8.p3(ptr addrspace(3) %{{.*}}) - %m16n16_l2t = nvvm.ldmatrix %arg0{num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %m16n16_l2t = nvvm.ldmatrix %arg0<{num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x1.trans.b8x16.b6x16_p32.p3(ptr addrspace(3) %{{.*}}) - %m16n16_b6_l1t = nvvm.ldmatrix %arg0 {num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %m16n16_b6_l1t = nvvm.ldmatrix %arg0 <{num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x2.trans.b8x16.b6x16_p32.p3(ptr addrspace(3) %{{.*}}) - %m16n16_b6_l2t = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %m16n16_b6_l2t = nvvm.ldmatrix %arg0 <{num = 2 : i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> // CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x1.trans.b8x16.b4x16_p64.p3(ptr addrspace(3) %{{.*}}) - %m16n16_b4_l1t = nvvm.ldmatrix %arg0 {num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %m16n16_b4_l1t = nvvm.ldmatrix %arg0 <{num = 1: i32, layout = #nvvm.mma_layout, shape =#nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m16n16.x2.trans.b8x16.b4x16_p64.p3(ptr addrspace(3) %{{.*}}) - %m16n16_b4_l2t = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %m16n16_b4_l2t = nvvm.ldmatrix %arg0 <{num = 2 : i32, layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape,eltType =#nvvm.ld_st_matrix_elt_type}> : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } // CHECK-LABEL: @st_matrix llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x1.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32 + nvvm.stmatrix %arg0, %r1 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x2.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32, i32, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32, i32, i32 // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x4.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type} : !llvm.ptr<3>, i32, i32, i32, i32 + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 <{layout = #nvvm.mma_layout, shape = #nvvm.ld_st_matrix_shape, eltType = #nvvm.ld_st_matrix_elt_type}> : !llvm.ptr<3>, i32, i32, i32, i32 llvm.return } // CHECK-LABEL: @nvvm_movmatrix llvm.func @nvvm_movmatrix(%src : i32) -> i32 { // CHECK: call i32 @llvm.nvvm.movmatrix.sync.aligned.m8n8.trans.b16(i32 %{{.*}}) - %dst = nvvm.movmatrix %src {shape = #nvvm.ld_st_matrix_shape, - eltType = #nvvm.ld_st_matrix_elt_type} : i32 + %dst = nvvm.movmatrix %src <{shape = #nvvm.ld_st_matrix_shape, + eltType = #nvvm.ld_st_matrix_elt_type}> : i32 llvm.return %dst : i32 } @@ -811,19 +811,19 @@ llvm.func @nvvm_redux_sync_f32(%value: f32, %offset: i32) { // CHECK: call float @llvm.nvvm.redux.sync.fmin(float %{{.*}}, i32 %{{.*}}) %0 = nvvm.redux.sync fmin %value, %offset: f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmin.abs(float %{{.*}}, i32 %{{.*}}) - %1 = nvvm.redux.sync fmin %value, %offset {abs = true}: f32 -> f32 + %1 = nvvm.redux.sync fmin %value, %offset abs = true : f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmin.NaN(float %{{.*}}, i32 %{{.*}}) - %2 = nvvm.redux.sync fmin %value, %offset {nan = true}: f32 -> f32 + %2 = nvvm.redux.sync fmin %value, %offset nan = true : f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmin.abs.NaN(float %{{.*}}, i32 %{{.*}}) - %3 = nvvm.redux.sync fmin %value, %offset {abs = true, nan = true}: f32 -> f32 + %3 = nvvm.redux.sync fmin %value, %offset abs = true nan = true : f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmax(float %{{.*}}, i32 %{{.*}}) %4 = nvvm.redux.sync fmax %value, %offset: f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmax.abs(float %{{.*}}, i32 %{{.*}}) - %5 = nvvm.redux.sync fmax %value, %offset {abs = true}: f32 -> f32 + %5 = nvvm.redux.sync fmax %value, %offset abs = true : f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmax.NaN(float %{{.*}}, i32 %{{.*}}) - %6 = nvvm.redux.sync fmax %value, %offset {nan = true}: f32 -> f32 + %6 = nvvm.redux.sync fmax %value, %offset nan = true : f32 -> f32 // CHECK: call float @llvm.nvvm.redux.sync.fmax.abs.NaN(float %{{.*}}, i32 %{{.*}}) - %7 = nvvm.redux.sync fmax %value, %offset {abs = true, nan = true}: f32 -> f32 + %7 = nvvm.redux.sync fmax %value, %offset abs = true nan = true : f32 -> f32 llvm.return } @@ -883,35 +883,35 @@ llvm.func @nvvm_dot_accumulate_2way(%a: vector<2xi16>, %b: vector<4xi8>, %c: i32 // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.u.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}}) - %0 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = false} : vector<2xi16>, vector<4xi8> + %0 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = false}> : vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.u.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}}) - %1 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = true}: vector<2xi16>, vector<4xi8> + %1 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = true}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.s.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}}) - %2 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = false}: vector<2xi16>, vector<4xi8> + %2 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = false}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.s.u(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}}) - %3 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = true}: vector<2xi16>, vector<4xi8> + %3 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = true}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.u.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}}) - %4 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = false}: vector<2xi16>, vector<4xi8> + %4 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = false}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.u.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}}) - %5 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = true}: vector<2xi16>, vector<4xi8> + %5 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = true}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.s.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 false, i32 %{{.*}}) - %6 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = false}: vector<2xi16>, vector<4xi8> + %6 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = false}>: vector<2xi16>, vector<4xi8> // CHECK: %[[a_cast:.*]] = bitcast <2 x i16> %{{.*}} to i32 // CHECK: %[[b_cast:.*]] = bitcast <4 x i8> %{{.*}} to i32 // CHECK: call i32 @llvm.nvvm.idp2a.s.s(i32 %[[a_cast]], i32 %[[b_cast]], i1 true, i32 %{{.*}}) - %7 = nvvm.dot.accumulate.2way %a , %b , %c {b_hi = true}: vector<2xi16>, vector<4xi8> + %7 = nvvm.dot.accumulate.2way %a , %b , %c <{b_hi = true}>: vector<2xi16>, vector<4xi8> llvm.return } diff --git a/mlir/test/python/dialects/nvvm.py b/mlir/test/python/dialects/nvvm.py index d727a39e956e0..28663e784ac00 100644 --- a/mlir/test/python/dialects/nvvm.py +++ b/mlir/test/python/dialects/nvvm.py @@ -173,11 +173,11 @@ def barriers(mask, vi32, vf32): # CHECK: nvvm.barrier # CHECK: nvvm.bar.warp.sync %[[ARG0]] : i32 # CHECK: nvvm.cluster.arrive -# CHECK: nvvm.cluster.arrive {aligned} +# CHECK: nvvm.cluster.arrive aligned # CHECK: nvvm.cluster.arrive.relaxed -# CHECK: nvvm.cluster.arrive.relaxed {aligned} +# CHECK: nvvm.cluster.arrive.relaxed aligned # CHECK: nvvm.cluster.wait -# CHECK: nvvm.cluster.wait {aligned} +# CHECK: nvvm.cluster.wait aligned # CHECK: nvvm.fence.mbarrier.init # CHECK: nvvm.bar.warp.sync %[[ARG0]] : i32 # CHECK: return %[[BARRIER_3]] : i32 @@ -346,8 +346,8 @@ def reductions(mask, vi32, vf32): # CHECK: %[[REDUX_4:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_5:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_6:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32 -# CHECK: %[[REDUX_7:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {abs = true, nan = true} : f32 -> f32 -# CHECK: %[[REDUX_8:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {abs = true, nan = true} : f32 -> f32 +# CHECK: %[[REDUX_7:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] abs = true nan = true : f32 -> f32 +# CHECK: %[[REDUX_8:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] abs = true nan = true : f32 -> f32 # CHECK: %[[REDUX_9:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_10:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_11:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32 @@ -355,8 +355,8 @@ def reductions(mask, vi32, vf32): # CHECK: %[[REDUX_13:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_14:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_15:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32 -# CHECK: %[[REDUX_16:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {abs = true} : f32 -> f32 -# CHECK: %[[REDUX_17:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {abs = true} : f32 -> f32 +# CHECK: %[[REDUX_16:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] abs = true : f32 -> f32 +# CHECK: %[[REDUX_17:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] abs = true : f32 -> f32 # CHECK: %[[REDUX_18:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_19:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_20:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32 @@ -364,8 +364,8 @@ def reductions(mask, vi32, vf32): # CHECK: %[[REDUX_22:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_23:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_24:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32 -# CHECK: %[[REDUX_25:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {nan = true} : f32 -> f32 -# CHECK: %[[REDUX_26:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {nan = true} : f32 -> f32 +# CHECK: %[[REDUX_25:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] nan = true : f32 -> f32 +# CHECK: %[[REDUX_26:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] nan = true : f32 -> f32 # CHECK: %[[REDUX_27:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_28:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32 # CHECK: %[[REDUX_29:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32