diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index a366db1c580ba..7545dfa3f3fff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -586,6 +586,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", "Use true 16-bit registers" >; +def Feature16bitD16HWBug : SubtargetFeature<"d16-hw-bug", + "Enable16bitD16HWBug", + "true", + "Disable D16 for 16 bit data type for true16 mode" +>; + def FeatureBF16TransInsts : SubtargetFeature<"bf16-trans-insts", "HasBF16TransInsts", "true", @@ -1934,7 +1940,9 @@ def FeatureISAVersion11_Common : FeatureSet< FeaturePackedTID, FeatureVcmpxPermlaneHazard, FeatureMemoryAtomicFAddF32DenormalSupport, - FeatureRealTrue16Insts]>; + FeatureRealTrue16Insts, + Feature16bitD16HWBug, +]>; // There are few workarounds that need to be // added to all targets. This pessimizes codegen @@ -2570,6 +2578,13 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && // FIXME When we default to RealTrue16 instead of Fake, change the line as follows. // AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>; +// Do not use D16 inst for 16bit data type +def Has16bitD16HWBug: Predicate<"Subtarget->has16bitD16HWBug()">, + AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, Feature16bitD16HWBug)>; +def NotHas16bitD16HWBug: Predicate<"Subtarget->useRealTrue16Insts() && " + "!Subtarget->has16bitD16HWBug()">, + AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, (not Feature16bitD16HWBug))>; + def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">, AssemblerPredicate<(all_of FeatureBF16TransInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 73acb1ddbd2a7..521cd208f5326 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -38,6 +38,10 @@ bool AMDGPUSubtarget::useRealTrue16Insts() const { return hasTrue16BitInsts() && EnableRealTrue16Insts; } +bool AMDGPUSubtarget::has16bitD16HWBug() const { + return hasTrue16BitInsts() && useRealTrue16Insts() && Enable16bitD16HWBug; +} + // Returns the maximum per-workgroup LDS allocation size (in bytes) that still // allows the given function to achieve an occupancy of NWaves waves per // SIMD / EU, taking into account only the function's *maximum* workgroup size. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 57b757c990e1a..e5203486436e4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -59,6 +59,7 @@ class AMDGPUSubtarget { bool HasCvtPkF16F32Inst = false; bool HasF32ToF16BF16ConversionSRInsts = false; bool EnableRealTrue16Insts = false; + bool Enable16bitD16HWBug = false; bool HasBF16TransInsts = false; bool HasBF16ConversionInsts = false; bool HasBF16PackedInsts = false; @@ -224,6 +225,8 @@ class AMDGPUSubtarget { // supported and the support for fake True16 instructions is removed. bool useRealTrue16Insts() const; + bool has16bitD16HWBug() const; + bool hasBF16TransInsts() const { return HasBF16TransInsts; } bool hasBF16ConversionInsts() const { diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index a1306565bbe29..3f31cde5daf28 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1378,13 +1378,19 @@ let SubtargetPredicate = HasVmemPrefInsts in { } //===----------------------------------------------------------------------===// -// Flat Patterns +// Utilities //===----------------------------------------------------------------------===// +class Mem_wrap { + dag ret = !if(true16, (EXTRACT_SUBREG op, lo16), op); +} +//===----------------------------------------------------------------------===// +// Flat Patterns +//===----------------------------------------------------------------------===// // Patterns for global loads with no offset. -class FlatLoadPat : GCNPat < +class FlatLoadPat : GCNPat < (vt (node (FlatOffset i64:$vaddr, i32:$offset))), - (inst $vaddr, $offset) + Mem_wrap<(inst $vaddr, $offset), true16>.ret >; class FlatLoadPat_CPOL : GCNPat < @@ -1462,14 +1468,14 @@ class GlobalLoadSaddrPat_D16_t16 ; -class FlatLoadSignedPat : GCNPat < +class FlatLoadSignedPat : GCNPat < (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), - (inst $vaddr, $offset) + Mem_wrap<(inst $vaddr, $offset), true16>.ret >; -class FlatLoadSaddrPat : GCNPat < +class FlatLoadSaddrPat : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol))), - (inst $saddr, $voffset, $offset, $cpol) + Mem_wrap<(inst $saddr, $voffset, $offset, $cpol), true16>.ret >; class FlatLoadSignedPat_M0 : GCNPat < @@ -1629,9 +1635,9 @@ multiclass FlatSignedAtomicPat (inst), noRtnNode, vt, data_vt>; } -class ScratchLoadSignedPat : GCNPat < +class ScratchLoadSignedPat : GCNPat < (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), - (inst $vaddr, $offset) + Mem_wrap<(inst $vaddr, $offset), true16>.ret >; class ScratchLoadSignedPat_D16 : GCNPat < @@ -1649,9 +1655,9 @@ class ScratchStoreSignedPat .ret:$data, $vaddr, $offset) >; -class ScratchLoadSaddrPat : GCNPat < +class ScratchLoadSaddrPat : GCNPat < (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), - (inst $saddr, $offset) + Mem_wrap<(inst $saddr, $offset), true16>.ret >; class ScratchLoadSaddrPat_D16 : GCNPat < @@ -1670,9 +1676,9 @@ class ScratchStoreSaddrPat .ret:$data, $saddr, $offset) >; -class ScratchLoadSVaddrPat : GCNPat < +class ScratchLoadSVaddrPat : GCNPat < (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset, CPol:$cpol))), - (inst $vaddr, $saddr, $offset, $cpol) + Mem_wrap<(inst $vaddr, $saddr, $offset, $cpol), true16>.ret >; class ScratchStoreSVaddrPat { } } -multiclass GlobalFLATLoadPats { - def : FlatLoadSignedPat { +multiclass GlobalFLATLoadPats { + def : FlatLoadSignedPat { let AddedComplexity = 10; let SubtargetPredicate = inst.SubtargetPredicate; let OtherPredicates = inst.OtherPredicates; } - def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt> { + def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt, isTrue16> { let AddedComplexity = 11; let SubtargetPredicate = inst.SubtargetPredicate; let OtherPredicates = inst.OtherPredicates; @@ -1860,16 +1866,16 @@ multiclass GlobalFLATAtomicIntrPats; } -multiclass ScratchFLATLoadPats { - def : ScratchLoadSignedPat { +multiclass ScratchFLATLoadPats { + def : ScratchLoadSignedPat { let AddedComplexity = 25; } - def : ScratchLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt> { + def : ScratchLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt, isTrue16> { let AddedComplexity = 26; } - def : ScratchLoadSVaddrPat(!cast(inst)#"_SVS"), node, vt> { + def : ScratchLoadSVaddrPat(!cast(inst)#"_SVS"), node, vt, isTrue16> { let SubtargetPredicate = HasFlatScratchSVSMode; let AddedComplexity = 27; } @@ -1937,12 +1943,12 @@ multiclass ScratchFLATLoadPats_D16_t16 { - def : FlatLoadPat { +multiclass FlatLoadPats { + def : FlatLoadPat { let OtherPredicates = [HasFlatAddressSpace]; } - def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt> { + def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt, isTrue16> { let AddedComplexity = 9; let SubtargetPredicate = HasFlatGVSMode; } @@ -2018,6 +2024,13 @@ let True16Predicate = p in { } let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in { + defm : FlatStorePats_t16 ; + defm : FlatStorePats_t16 ; + def : FlatStorePat ; + def : FlatStorePat ; +} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts + +let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in { defm : FlatLoadPats_D16_t16; defm : FlatLoadPats_D16_t16; defm : FlatLoadPats_D16_t16; @@ -2026,11 +2039,18 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi defm : FlatLoadPats_D16_t16; defm : FlatLoadPats_D16_t16; defm : FlatLoadPats_D16_t16; - defm : FlatStorePats_t16 ; - defm : FlatStorePats_t16 ; - def : FlatStorePat ; - def : FlatStorePat ; -} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts +} + +let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in { + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; +} defm : FlatLoadPats ; defm : FlatLoadPats ; @@ -2161,22 +2181,37 @@ defm : GlobalFLATLoadPats ; } -let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in { -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>; -defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>; -defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>; -defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>; -defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>; -defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>; +let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in { + defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>; + defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>; + defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", atomic_store_8_global, i16>; + defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", atomic_store_16_global, i16>; } // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts +let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, NotHas16bitD16HWBug], True16Predicate = UseRealTrue16Insts in { + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_aext_8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", atomic_load_zext_8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", atomic_load_sext_8_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_nonext_16_global, i16>; + defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", atomic_load_zext_16_global, i16>; +} + +let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits, Has16bitD16HWBug], True16Predicate = UseRealTrue16Insts in { + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; + defm : GlobalFLATLoadPats ; +} + foreach vt = Reg32Types.types in { defm : GlobalFLATLoadPats ; defm : GlobalFLATStorePats ; @@ -2386,12 +2421,20 @@ defm : ScratchFLATStorePats ; } let True16Predicate = UseRealTrue16Insts in { -defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>; -defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>; -defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>; -defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>; -defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>; -defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>; + let OtherPredicates = [NotHas16bitD16HWBug] in { + defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", extloadi8_private, i16>; + defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_UBYTE_D16", zextloadi8_private, i16>; + defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SBYTE_D16", sextloadi8_private, i16>; + defm : ScratchFLATLoadPats_D16_t16<"SCRATCH_LOAD_SHORT_D16", load_private, i16>; + } + let OtherPredicates = [Has16bitD16HWBug] in { + defm : ScratchFLATLoadPats ; + defm : ScratchFLATLoadPats ; + defm : ScratchFLATLoadPats ; + defm : ScratchFLATLoadPats ; + } + defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_SHORT", store_private, i16>; + defm : ScratchFLATStorePats_t16 <"SCRATCH_STORE_BYTE", truncstorei8_private, i16>; } // End True16Predicate = UseRealTrue16Insts foreach vt = Reg32Types.types in { diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index 46b82d3a3d651..71a198ce49814 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -15369,876 +15369,913 @@ define <32 x i32> @bitcast_v128i8_to_v32i32(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v51, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:536 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:412 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:392 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v49, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v114, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v81, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v97, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v98, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v102, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v160, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v161, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v161, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v162, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v163, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v164, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v165, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v82, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v99, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v101, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:152 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v105, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v106, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v107, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v108, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v117.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v118.h, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.l, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.h, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v131.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v144.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v145.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.h, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v83.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v84.h, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v96.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v97.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v33.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v58.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v48.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v50.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v114 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v51 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v81.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v52.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v53.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v54.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v83.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v55.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v85.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v64.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v85.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v65.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v66.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v87.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v67.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v97.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v98.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v100.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v101.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v102.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v160.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v160.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v161.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v161.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v162.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v162.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v163.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v163.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v104.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v164.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v105.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v164.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v106.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v165.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v107.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v165.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v71.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v71.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v70.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v70.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v68.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v67.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v66.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v66.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v64.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v64.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v55.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v54.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v53.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v52.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v51.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v50.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v108.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB14_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB14_4 -; GFX11-TRUE16-NEXT: .LBB14_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB14_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v149.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v146.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v0.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v151.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v0.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v150.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v145.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v144.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v149, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v1.h, v150.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v132.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v148.h -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v130.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v2.l, v148.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v2.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v134.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v145.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v130.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v149, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v3.l, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v147.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v135.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v119.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v119.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v118.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v4.l, v144.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v4.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v133.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB14_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v101.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v98.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v0.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v103.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v102.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v97.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v96.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v101, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v1.h, v102.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v84.h +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v100.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v82.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v2.l, v100.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v86.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v97.l +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v82.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v101, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v3.l, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v87.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v71.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v70.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v4.l, v96.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v85.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v69.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v115.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v115.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v149, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v5.l, v135.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v5.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v129.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v113.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v112.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v149, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v6.l, v133.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v6.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v128.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v103.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v101.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v149, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v7.l, v131.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v7.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v118.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v99.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v149, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v8.l, v129.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v8.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v149, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v9.l, v128.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v9.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v113.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v149, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v10.l, v117.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v10.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v102.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v82.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v81.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v149, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v11.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v99.l -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v149, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v12.l, v114.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v12.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v97.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v149, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v13.l, v112.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v13.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v149, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v14.l, v102.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v14.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v85.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v149, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v15.l, v100.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v15.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v149, v15 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v16.l, v98.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v16.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v149, v16 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v17.l, v97.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v17.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v70.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v18.l, v87.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v18.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v149, v18 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v19.l, v85.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v19.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v149, v19 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v20.l, v83.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v20.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v54.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v149, v20 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v21.l, v81.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v21.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v149, v21 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v22.l, v71.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v22.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v5.l, v87.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v81.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v101, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v6.l, v85.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v101, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v7.l, v83.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v101, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v8.l, v81.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v67.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v101, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v9.l, v80.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v66.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v101, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v10.l, v69.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v65.h +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v101, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v55.h +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v101, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v12.l, v67.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v54.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v101, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v13.l, v66.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v53.h +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v101, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v14.l, v65.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v51.h +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v101, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v15.l, v64.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v50.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v101, v15 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v16.l, v55.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v49.h +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v101, v16 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v17.l, v54.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v101, v17 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v18.l, v53.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v101, v18 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v19.l, v52.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v101, v19 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v20.l, v51.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v35.h +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v101, v20 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v21.l, v50.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v101, v21 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v22.l, v49.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v112.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v149, v22 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v23.l, v70.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v23.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v51.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v101, v22 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v23.l, v48.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v32.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v149, v23 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v24.l, v67.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v24.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v149, v24 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v25.l, v66.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v25.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v101, v23 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v24.l, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v101, v24 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v25.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v149, v25 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v26.l, v64.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v26.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v149, v26 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v27.l, v54.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v27.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v101, v25 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v26.l, v37.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v101, v26 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v27.l, v36.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v149, v27 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v28.l, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v28.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v149, v28 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v29.l, v52.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v29.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v101, v27 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v28.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v101, v28 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v29.l, v34.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v149, v29 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v30.l, v51.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v149, v30 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v31.l, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v101, v29 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v30.l, v33.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v101, v30 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v31.l, v32.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v149, v31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v101, v31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 +; GFX11-TRUE16-NEXT: .LBB14_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB14_2 -; GFX11-TRUE16-NEXT: .LBB14_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v149.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v146.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB14_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v101.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v101.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v98.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v98.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v97.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v96.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v134.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v31, v3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v86.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v148.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v100.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v148.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v100.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v31, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v31.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v147.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v147.h, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v132.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v131.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v99.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v99.h, v2.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v84.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v83.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v31, v5 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v130.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v130.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v82.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v82.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v144.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v145.l, v3.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v96.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v97.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v31, v6 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v135.l, v4.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v135.h, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v119.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v119.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v87.l, v4.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v87.h, v4.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v71.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v31, v7 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v118.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v133.l, v5.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v133.h, v5.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v85.l, v5.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v85.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v31, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v131.h, v6.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v132.l, v6.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v83.h, v6.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v84.l, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v115.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v47.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v46.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v31, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v113.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v129.l, v7.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v129.h, v7.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v81.l, v7.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v81.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v31, v10 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v128.l, v8.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v128.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v103.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v103.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v80.l, v8.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v80.h, v8.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v42.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v31, v11 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v101.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v100.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v41.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v40.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v117.h, v9.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v118.l, v9.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v69.h, v9.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v70.l, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v31, v12 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v116.l, v10.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v116.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v99.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v98.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v68.l, v10.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v68.h, v10.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v182.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v31, v13 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v96.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v96.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v181.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v180.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v114.l, v11.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v114.h, v11.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v67.l, v11.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v67.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v31, v14 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v112.h, v12.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v113.l, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v86.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v66.l, v12.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v66.h, v12.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v178.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v31, v15 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v84.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v176.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v102.l, v13.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v102.h, v13.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v65.l, v13.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v65.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v31, v16 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v100.h, v14.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v101.l, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v82.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v81.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v64.l, v14.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v64.h, v14.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v31, v17 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v80.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v165.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v164.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v98.h, v15.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v99.l, v15.h +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v55.l, v15.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v55.h, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v16.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v31, v18 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v97.l, v16.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v97.h, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v69.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v69.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v54.l, v16.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v54.h, v16.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v163.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v162.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v31, v19 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v68.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v161.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v160.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v87.l, v17.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v87.h, v17.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v53.l, v17.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v53.h, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v18.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v31, v20 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v85.l, v18.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v85.h, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v52.l, v18.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v52.h, v18.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v151.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v150.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v31, v21 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v149.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v148.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v83.l, v19.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v83.h, v19.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v51.l, v19.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v51.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v31, v22 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v81.h, v20.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v82.l, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v50.l, v20.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v50.h, v20.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v146.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v31, v23 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v145.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v144.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v71.l, v21.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v71.h, v21.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v49.l, v21.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v49.h, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v22.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v31, v24 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v70.l, v22.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v70.h, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v48.l, v22.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v48.h, v22.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v134.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v31, v25 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v132.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v67.h, v23.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v68.l, v23.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v39.l, v23.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v39.h, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v24.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v31, v26 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v66.l, v24.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v66.h, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v38.l, v24.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v38.h, v24.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v131.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v31, v27 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v128.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v64.l, v25.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v64.h, v25.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v37.l, v25.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v37.h, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v26.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v31, v28 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v54.h, v26.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v55.l, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v36.l, v26.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v36.h, v26.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v31, v29 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v116.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v53.h, v27.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v54.l, v27.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v35.l, v27.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v35.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v28.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v31, v30 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v52.h, v28.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v53.l, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v35.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v34.l, v28.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v34.h, v28.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v35 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v51.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v52.l, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v33.l, v29.l +; GFX11-TRUE16-NEXT: v_or_b16 v33.h, v33.h, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v32.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.h, v30.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v51.l, v30.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v33.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v33.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v32.l, v30.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v32.h, v30.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v31, v33 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l @@ -16246,7 +16283,48 @@ define <32 x i32> @bitcast_v128i8_to_v32i32(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v32.h, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v31.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-TRUE16-NEXT: .LBB14_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:516 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:536 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v32i32: @@ -20512,1887 +20590,946 @@ define inreg <32 x i32> @bitcast_v128i8_to_v32i32_scalar(<128 x i8> inreg %a, i3 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v32i32_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v95, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v104, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v105, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v106, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v107, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v108, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v109, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v110, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v111, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v102, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v166, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v180, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 -; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB15_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v32i32_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v87, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v97, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v113, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v128, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v130, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v95, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v104, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v105, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v106, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v107, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v108, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v109, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v110, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v111, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v66, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v70, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v71, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v81, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v83, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v85, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v101, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v102, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v112, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v147, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v166, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v180, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB15_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB15_3 -; GFX11-FAKE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB15_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB15_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB15_2 +; GFX11-LABEL: bitcast_v128i8_to_v32i32_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:352 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 +; GFX11-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 +; GFX11-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 +; GFX11-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 +; GFX11-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 +; GFX11-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 +; GFX11-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v87, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v97, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v113, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v128, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v130, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v95, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v104, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v105, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v106, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v107, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v108, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v109, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v110, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v111, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v66, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v70, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v71, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v81, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v83, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v85, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v101, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v102, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v112, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v147, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v166, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v180, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v57, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v58, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v40, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v43, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v176, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v178, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v179, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v149, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v150, 8, v87 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v151, 8, v96 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v160, 8, v97 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v98 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v99 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v133, 8, v113 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v114 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v135, 8, v115 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v144, 8, v116 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v119, 8, v117 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v128 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v129, 8, v129 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v130 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v131, 8, v131 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v94 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v114, 8, v95 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v115, 8, v104 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v116, 8, v105 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v117, 8, v106 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v107 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v96, 8, v108 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v109 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v98, 8, v110 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v99, 8, v111 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB15_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v54 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v53 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: v_or_b32_e32 v0, v0, v90 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v91 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: v_or_b32_e32 v5, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v49 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v76 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v77 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v88 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v63 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v9, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v73 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v10, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v33 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v74 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v75 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v62 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v58 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v56 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v47 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v60 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v13, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v46 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v45 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v14, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v41 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v42 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v15, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v43 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v16, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v165 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v167 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v176 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v164 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v163 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v178 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v162 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v148 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v179 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v149 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v19, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v147 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v150 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v151 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v20, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v161 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v21, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v112 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v103 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v132 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v133 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v101 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v134 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v23, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v86 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v144 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v24, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v85 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v84 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v128 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v129 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v25, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v83 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v130 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v131 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v26, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v81 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v80 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v114 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v71 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v70 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v116 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v68 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v117 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v87 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v29, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v67 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v66 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v96 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v97 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v30, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v65 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v98 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v99 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v31, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v55 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s9, s9, 0xffff +; GFX11-NEXT: s_lshl_b32 s10, s10, 16 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v51 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v52 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_or_b32_e32 v3, v3, v93 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v92 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v6, v2, v3 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB15_3 +; GFX11-NEXT: .LBB15_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v55 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v54 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v52 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v51 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v38 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v89, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v90, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v92, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v93, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v7, v88, v7 +; GFX11-NEXT: v_or_b32_e32 v11, v74, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v75, v12 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_or_b32_e32 v6, v3, v6 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v35 +; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v12 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v46 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v181 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v79, v3 +; GFX11-NEXT: v_or_b32_e32 v7, v63, v7 +; GFX11-NEXT: v_or_b32_e32 v8, v72, v8 +; GFX11-NEXT: v_or_b32_e32 v10, v73, v10 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_or_b32_e32 v12, v61, v12 +; GFX11-NEXT: v_or_b32_e32 v16, v43, v16 +; GFX11-NEXT: v_or_b32_e32 v17, v44, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v7 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v13 +; GFX11-NEXT: v_or_b32_e32 v10, v14, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v62 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v56 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v45 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v162 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v118 +; GFX11-NEXT: v_or_b32_e32 v0, v57, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v58, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v59, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v60, v3 +; GFX11-NEXT: v_or_b32_e32 v12, v40, v12 +; GFX11-NEXT: v_or_b32_e32 v13, v41, v13 +; GFX11-NEXT: v_or_b32_e32 v15, v42, v15 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v21 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_or_b32_e32 v17, v179, v17 +; GFX11-NEXT: v_or_b32_e32 v21, v160, v21 +; GFX11-NEXT: v_or_b32_e32 v22, v161, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v18, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v18 +; GFX11-NEXT: v_or_b32_e32 v15, v19, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v166 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v164 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v163 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 3, v147 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 +; GFX11-NEXT: v_or_b32_e32 v21, v21, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v100 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 3, v83 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v167, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v176, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v177, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v178, v3 +; GFX11-NEXT: v_or_b32_e32 v17, v149, v17 +; GFX11-NEXT: v_or_b32_e32 v18, v150, v18 +; GFX11-NEXT: v_or_b32_e32 v20, v151, v20 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 +; GFX11-NEXT: v_or_b32_e32 v22, v144, v22 +; GFX11-NEXT: v_or_b32_e32 v26, v130, v26 +; GFX11-NEXT: v_or_b32_e32 v27, v131, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v19, v19, v23 +; GFX11-NEXT: v_or_b32_e32 v20, v24, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v112 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v102 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v101 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v86 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 3, v85 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 3, v84 +; GFX11-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v23, 0xff, v23 +; GFX11-NEXT: v_and_b32_e32 v25, 0xff, v25 +; GFX11-NEXT: v_or_b32_e32 v26, v26, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v69 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v133, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v134, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v135, v3 +; GFX11-NEXT: v_or_b32_e32 v22, v119, v22 +; GFX11-NEXT: v_or_b32_e32 v23, v128, v23 +; GFX11-NEXT: v_or_b32_e32 v25, v129, v25 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 +; GFX11-NEXT: v_or_b32_e32 v27, v117, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v28, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v23, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v24, v24, v28 +; GFX11-NEXT: v_or_b32_e32 v25, v29, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v81 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v80 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v71 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v70 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 3, v66 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 3, v65 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 3, v64 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xff, v31 +; GFX11-NEXT: v_and_b32_e32 v32, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v113, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v114, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v115, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v116, v3 +; GFX11-NEXT: v_or_b32_e32 v27, v87, v27 +; GFX11-NEXT: v_or_b32_e32 v28, v96, v28 +; GFX11-NEXT: v_or_b32_e32 v30, v97, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v98, v31 +; GFX11-NEXT: v_or_b32_e32 v32, v99, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v32 +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v29, v29, v33 +; GFX11-NEXT: v_or_b32_e32 v30, v34, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB15_3: ; %end +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:440 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:444 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:448 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:452 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:456 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:460 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:464 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:468 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:472 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:476 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB15_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB15_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -52092,876 +51229,913 @@ define <32 x float> @bitcast_v128i8_to_v32f32(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v51, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:536 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:412 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:392 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v49, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v114, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v81, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v97, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v98, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v102, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v160, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v161, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v161, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v162, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v163, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v164, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v165, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v82, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v99, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v101, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:152 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v105, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v106, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v107, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v108, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v117.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v118.h, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.l, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.h, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v131.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v144.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v145.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.h, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v83.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v84.h, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v96.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v97.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v33.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v58.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v48.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v50.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v114 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v51 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v81.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v52.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v53.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v54.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v83.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v55.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v85.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v64.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v85.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v65.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v66.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v87.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v67.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v97.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v98.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v100.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v101.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v102.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v160.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v160.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v161.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v161.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v162.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v162.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v163.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v163.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v104.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v164.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v105.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v164.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v106.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v165.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v107.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v165.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v71.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v71.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v70.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v70.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v68.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v67.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v66.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v66.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v64.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v64.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v55.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v54.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v53.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v52.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v51.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v50.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v108.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB38_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB38_4 -; GFX11-TRUE16-NEXT: .LBB38_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB38_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v149.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v146.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v0.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v151.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v0.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v150.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v145.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v144.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v149, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v1.h, v150.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v132.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v148.h -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v130.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v2.l, v148.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v2.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v134.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v145.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v130.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v149, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v3.l, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v147.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v135.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v119.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v119.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v118.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v4.l, v144.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v4.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v133.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB38_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v101.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v98.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v0.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v103.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v102.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v97.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v96.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v101, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v1.h, v102.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v84.h +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v100.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v82.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v2.l, v100.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v86.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v97.l +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v82.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v101, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v3.l, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v87.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v71.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v70.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v4.l, v96.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v85.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v69.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v115.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v115.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v149, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v5.l, v135.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v5.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v129.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v113.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v112.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v149, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v6.l, v133.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v6.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v128.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v103.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v101.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v149, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v7.l, v131.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v7.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v118.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v99.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v149, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v8.l, v129.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v8.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v149, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v9.l, v128.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v9.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v113.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v149, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v10.l, v117.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v10.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v102.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v82.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v81.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v149, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v11.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v99.l -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v149, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v12.l, v114.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v12.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v97.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v149, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v13.l, v112.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v13.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v149, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v14.l, v102.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v14.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v85.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v149, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v15.l, v100.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v15.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v149, v15 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v16.l, v98.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v16.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v149, v16 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v17.l, v97.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v17.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v70.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v18.l, v87.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v18.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v149, v18 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v19.l, v85.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v19.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v149, v19 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v20.l, v83.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v20.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v54.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v149, v20 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v21.l, v81.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v21.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v149, v21 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v22.l, v71.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v22.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v5.l, v87.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v81.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v101, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v6.l, v85.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v101, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v7.l, v83.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v101, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v8.l, v81.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v67.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v101, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v9.l, v80.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v66.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v101, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v10.l, v69.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v65.h +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v101, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v55.h +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v101, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v12.l, v67.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v54.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v101, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v13.l, v66.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v53.h +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v101, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v14.l, v65.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v51.h +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v101, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v15.l, v64.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v50.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v101, v15 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v16.l, v55.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v49.h +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v101, v16 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v17.l, v54.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v101, v17 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v18.l, v53.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v101, v18 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v19.l, v52.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v101, v19 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v20.l, v51.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v35.h +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v101, v20 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v21.l, v50.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v101, v21 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v22.l, v49.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v112.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v149, v22 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v23.l, v70.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v23.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v51.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v101, v22 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v23.l, v48.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v32.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v149, v23 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v24.l, v67.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v24.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v149, v24 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v25.l, v66.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v25.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v101, v23 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v24.l, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v101, v24 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v25.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v149, v25 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v26.l, v64.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v26.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v149, v26 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v27.l, v54.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v27.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v101, v25 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v26.l, v37.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v101, v26 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v27.l, v36.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v149, v27 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v28.l, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v28.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v149, v28 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v29.l, v52.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v29.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v101, v27 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v28.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v101, v28 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v29.l, v34.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v149, v29 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v30.l, v51.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v149, v30 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v31.l, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v101, v29 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v30.l, v33.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v101, v30 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v31.l, v32.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v149, v31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v101, v31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 +; GFX11-TRUE16-NEXT: .LBB38_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB38_2 -; GFX11-TRUE16-NEXT: .LBB38_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v149.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v146.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB38_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v101.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v101.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v98.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v98.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v97.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v96.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v134.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v31, v3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v86.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v148.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v100.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v148.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v100.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v31, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v31.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v147.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v147.h, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v132.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v131.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v99.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v99.h, v2.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v84.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v83.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v31, v5 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v130.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v130.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v82.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v82.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v144.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v145.l, v3.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v96.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v97.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v31, v6 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v135.l, v4.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v135.h, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v119.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v119.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v87.l, v4.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v87.h, v4.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v71.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v31, v7 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v118.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v133.l, v5.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v133.h, v5.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v85.l, v5.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v85.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v31, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v131.h, v6.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v132.l, v6.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v83.h, v6.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v84.l, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v115.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v47.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v46.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v31, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v113.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v129.l, v7.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v129.h, v7.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v81.l, v7.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v81.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v31, v10 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v128.l, v8.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v128.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v103.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v103.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v80.l, v8.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v80.h, v8.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v42.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v31, v11 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v101.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v100.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v41.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v40.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v117.h, v9.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v118.l, v9.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v69.h, v9.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v70.l, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v31, v12 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v116.l, v10.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v116.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v99.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v98.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v68.l, v10.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v68.h, v10.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v182.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v31, v13 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v96.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v96.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v181.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v180.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v114.l, v11.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v114.h, v11.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v67.l, v11.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v67.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v31, v14 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v112.h, v12.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v113.l, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v86.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v66.l, v12.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v66.h, v12.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v178.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v31, v15 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v84.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v176.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v102.l, v13.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v102.h, v13.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v65.l, v13.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v65.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v31, v16 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v100.h, v14.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v101.l, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v82.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v81.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v64.l, v14.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v64.h, v14.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v31, v17 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v80.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v165.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v164.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v98.h, v15.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v99.l, v15.h +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v55.l, v15.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v55.h, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v16.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v31, v18 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v97.l, v16.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v97.h, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v69.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v69.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v54.l, v16.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v54.h, v16.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v163.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v162.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v31, v19 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v68.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v161.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v160.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v87.l, v17.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v87.h, v17.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v53.l, v17.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v53.h, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v18.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v31, v20 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v85.l, v18.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v85.h, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v52.l, v18.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v52.h, v18.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v151.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v150.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v31, v21 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v149.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v148.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v83.l, v19.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v83.h, v19.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v51.l, v19.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v51.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v31, v22 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v81.h, v20.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v82.l, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v50.l, v20.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v50.h, v20.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v146.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v31, v23 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v145.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v144.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v71.l, v21.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v71.h, v21.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v49.l, v21.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v49.h, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v22.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v31, v24 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v70.l, v22.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v70.h, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v48.l, v22.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v48.h, v22.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v134.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v31, v25 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v132.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v67.h, v23.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v68.l, v23.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v39.l, v23.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v39.h, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v24.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v31, v26 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v66.l, v24.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v66.h, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v38.l, v24.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v38.h, v24.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v131.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v31, v27 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v128.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v64.l, v25.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v64.h, v25.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v37.l, v25.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v37.h, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v26.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v31, v28 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v54.h, v26.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v55.l, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v36.l, v26.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v36.h, v26.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v31, v29 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v116.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v53.h, v27.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v54.l, v27.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v35.l, v27.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v35.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v28.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v31, v30 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v52.h, v28.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v53.l, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v35.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v34.l, v28.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v34.h, v28.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v35 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v51.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v52.l, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v33.l, v29.l +; GFX11-TRUE16-NEXT: v_or_b16 v33.h, v33.h, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v32.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.h, v30.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v51.l, v30.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v33.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v33.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v32.l, v30.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v32.h, v30.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v31, v33 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l @@ -52969,7 +52143,48 @@ define <32 x float> @bitcast_v128i8_to_v32f32(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v32.h, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v31.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-TRUE16-NEXT: .LBB38_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:516 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:536 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v32f32: @@ -57235,1887 +56450,946 @@ define inreg <32 x float> @bitcast_v128i8_to_v32f32_scalar(<128 x i8> inreg %a, ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v32f32_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v95, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v104, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v105, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v106, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v107, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v108, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v109, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v110, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v111, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v102, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v166, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v180, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB39_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB39_3 -; GFX11-TRUE16-NEXT: .LBB39_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB39_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB39_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB39_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v32f32_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v87, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v97, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v113, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v128, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v130, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v95, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v104, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v105, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v106, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v107, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v108, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v109, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v110, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v111, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v66, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v70, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v71, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v81, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v83, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v85, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v101, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v102, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v112, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v147, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v166, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v180, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB39_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB39_3 -; GFX11-FAKE16-NEXT: .LBB39_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB39_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB39_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB39_2 +; GFX11-LABEL: bitcast_v128i8_to_v32f32_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:352 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 +; GFX11-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 +; GFX11-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 +; GFX11-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 +; GFX11-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 +; GFX11-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 +; GFX11-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v87, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v97, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v113, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v128, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v130, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v95, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v104, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v105, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v106, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v107, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v108, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v109, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v110, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v111, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v66, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v70, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v71, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v81, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v83, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v85, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v101, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v102, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v112, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v147, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v166, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v180, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v57, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v58, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v40, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v43, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v176, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v178, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v179, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v149, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v150, 8, v87 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v151, 8, v96 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v160, 8, v97 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v98 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v99 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v133, 8, v113 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v114 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v135, 8, v115 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v144, 8, v116 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v119, 8, v117 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v128 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v129, 8, v129 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v130 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v131, 8, v131 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v94 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v114, 8, v95 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v115, 8, v104 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v116, 8, v105 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v117, 8, v106 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v107 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v96, 8, v108 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v109 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v98, 8, v110 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v99, 8, v111 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB39_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v54 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v53 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: v_or_b32_e32 v0, v0, v90 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v91 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: v_or_b32_e32 v5, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v49 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v76 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v77 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v88 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v63 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v9, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v73 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v10, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v33 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v74 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v75 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v62 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v58 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v56 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v47 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v60 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v13, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v46 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v45 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v14, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v41 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v42 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v15, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v43 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v16, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v165 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v167 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v176 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v164 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v163 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v178 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v162 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v148 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v179 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v149 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v19, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v147 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v150 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v151 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v20, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v161 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v21, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v112 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v103 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v132 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v133 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v101 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v134 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v23, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v86 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v144 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v24, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v85 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v84 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v128 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v129 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v25, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v83 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v130 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v131 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v26, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v81 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v80 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v114 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v71 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v70 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v116 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v68 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v117 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v87 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v29, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v67 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v66 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v96 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v97 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v30, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v65 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v98 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v99 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v31, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v55 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s9, s9, 0xffff +; GFX11-NEXT: s_lshl_b32 s10, s10, 16 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v51 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v52 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_or_b32_e32 v3, v3, v93 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v92 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v6, v2, v3 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB39_3 +; GFX11-NEXT: .LBB39_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v55 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v54 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v52 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v51 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v38 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v89, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v90, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v92, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v93, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v7, v88, v7 +; GFX11-NEXT: v_or_b32_e32 v11, v74, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v75, v12 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_or_b32_e32 v6, v3, v6 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v35 +; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v12 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v46 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v181 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v79, v3 +; GFX11-NEXT: v_or_b32_e32 v7, v63, v7 +; GFX11-NEXT: v_or_b32_e32 v8, v72, v8 +; GFX11-NEXT: v_or_b32_e32 v10, v73, v10 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_or_b32_e32 v12, v61, v12 +; GFX11-NEXT: v_or_b32_e32 v16, v43, v16 +; GFX11-NEXT: v_or_b32_e32 v17, v44, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v7 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v13 +; GFX11-NEXT: v_or_b32_e32 v10, v14, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v62 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v56 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v45 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v162 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v118 +; GFX11-NEXT: v_or_b32_e32 v0, v57, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v58, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v59, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v60, v3 +; GFX11-NEXT: v_or_b32_e32 v12, v40, v12 +; GFX11-NEXT: v_or_b32_e32 v13, v41, v13 +; GFX11-NEXT: v_or_b32_e32 v15, v42, v15 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v21 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_or_b32_e32 v17, v179, v17 +; GFX11-NEXT: v_or_b32_e32 v21, v160, v21 +; GFX11-NEXT: v_or_b32_e32 v22, v161, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v18, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v18 +; GFX11-NEXT: v_or_b32_e32 v15, v19, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v166 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v164 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v163 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 3, v147 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 +; GFX11-NEXT: v_or_b32_e32 v21, v21, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v100 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 3, v83 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v167, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v176, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v177, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v178, v3 +; GFX11-NEXT: v_or_b32_e32 v17, v149, v17 +; GFX11-NEXT: v_or_b32_e32 v18, v150, v18 +; GFX11-NEXT: v_or_b32_e32 v20, v151, v20 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 +; GFX11-NEXT: v_or_b32_e32 v22, v144, v22 +; GFX11-NEXT: v_or_b32_e32 v26, v130, v26 +; GFX11-NEXT: v_or_b32_e32 v27, v131, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v19, v19, v23 +; GFX11-NEXT: v_or_b32_e32 v20, v24, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v112 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v102 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v101 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v86 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 3, v85 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 3, v84 +; GFX11-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v23, 0xff, v23 +; GFX11-NEXT: v_and_b32_e32 v25, 0xff, v25 +; GFX11-NEXT: v_or_b32_e32 v26, v26, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v69 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v133, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v134, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v135, v3 +; GFX11-NEXT: v_or_b32_e32 v22, v119, v22 +; GFX11-NEXT: v_or_b32_e32 v23, v128, v23 +; GFX11-NEXT: v_or_b32_e32 v25, v129, v25 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 +; GFX11-NEXT: v_or_b32_e32 v27, v117, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v28, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v23, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v24, v24, v28 +; GFX11-NEXT: v_or_b32_e32 v25, v29, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v81 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v80 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v71 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v70 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 3, v66 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 3, v65 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 3, v64 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xff, v31 +; GFX11-NEXT: v_and_b32_e32 v32, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v113, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v114, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v115, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v116, v3 +; GFX11-NEXT: v_or_b32_e32 v27, v87, v27 +; GFX11-NEXT: v_or_b32_e32 v28, v96, v28 +; GFX11-NEXT: v_or_b32_e32 v30, v97, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v98, v31 +; GFX11-NEXT: v_or_b32_e32 v32, v99, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v32 +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v29, v29, v33 +; GFX11-NEXT: v_or_b32_e32 v30, v34, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB39_3: ; %end +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:440 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:444 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:448 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:452 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:456 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:460 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:464 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:468 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:472 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:476 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB39_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB39_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -86868,876 +85142,913 @@ define <16 x i64> @bitcast_v128i8_to_v16i64(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v51, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:536 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:412 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:392 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v49, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v114, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v81, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v97, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v98, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v102, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v160, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v161, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v161, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v162, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v163, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v164, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v165, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v82, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v99, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v101, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:152 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v105, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v106, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v107, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v108, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v117.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v118.h, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.l, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.h, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v131.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v144.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v145.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.h, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v83.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v84.h, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v96.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v97.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v33.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v58.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v48.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v50.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v114 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v51 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v81.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v52.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v53.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v54.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v83.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v55.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v85.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v64.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v85.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v65.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v66.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v87.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v67.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v97.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v98.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v100.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v101.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v102.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v160.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v160.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v161.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v161.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v162.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v162.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v163.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v163.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v104.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v164.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v105.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v164.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v106.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v165.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v107.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v165.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v71.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v71.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v70.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v70.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v68.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v67.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v66.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v66.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v64.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v64.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v55.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v54.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v53.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v52.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v51.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v50.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v108.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB58_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB58_4 -; GFX11-TRUE16-NEXT: .LBB58_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB58_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v149.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v146.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v0.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v151.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v0.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v150.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v145.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v144.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v149, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v1.h, v150.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v132.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v148.h -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v130.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v2.l, v148.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v2.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v134.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v145.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v130.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v149, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v3.l, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v147.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v135.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v119.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v119.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v118.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v4.l, v144.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v4.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v133.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB58_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v101.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v98.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v0.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v103.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v102.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v97.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v96.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v101, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v1.h, v102.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v84.h +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v100.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v82.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v2.l, v100.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v86.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v97.l +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v82.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v101, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v3.l, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v87.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v71.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v70.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v4.l, v96.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v85.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v69.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v115.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v115.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v149, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v5.l, v135.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v5.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v129.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v113.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v112.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v149, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v6.l, v133.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v6.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v128.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v103.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v101.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v149, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v7.l, v131.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v7.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v118.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v99.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v149, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v8.l, v129.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v8.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v149, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v9.l, v128.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v9.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v113.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v149, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v10.l, v117.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v10.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v102.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v82.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v81.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v149, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v11.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v99.l -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v149, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v12.l, v114.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v12.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v97.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v149, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v13.l, v112.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v13.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v149, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v14.l, v102.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v14.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v85.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v149, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v15.l, v100.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v15.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v149, v15 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v16.l, v98.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v16.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v149, v16 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v17.l, v97.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v17.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v70.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v18.l, v87.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v18.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v149, v18 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v19.l, v85.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v19.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v149, v19 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v20.l, v83.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v20.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v54.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v149, v20 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v21.l, v81.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v21.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v149, v21 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v22.l, v71.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v22.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v5.l, v87.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v81.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v101, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v6.l, v85.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v101, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v7.l, v83.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v101, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v8.l, v81.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v67.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v101, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v9.l, v80.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v66.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v101, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v10.l, v69.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v65.h +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v101, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v55.h +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v101, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v12.l, v67.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v54.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v101, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v13.l, v66.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v53.h +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v101, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v14.l, v65.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v51.h +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v101, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v15.l, v64.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v50.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v101, v15 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v16.l, v55.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v49.h +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v101, v16 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v17.l, v54.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v101, v17 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v18.l, v53.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v101, v18 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v19.l, v52.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v101, v19 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v20.l, v51.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v35.h +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v101, v20 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v21.l, v50.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v101, v21 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v22.l, v49.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v112.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v149, v22 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v23.l, v70.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v23.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v51.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v101, v22 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v23.l, v48.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v32.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v149, v23 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v24.l, v67.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v24.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v149, v24 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v25.l, v66.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v25.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v101, v23 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v24.l, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v101, v24 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v25.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v149, v25 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v26.l, v64.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v26.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v149, v26 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v27.l, v54.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v27.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v101, v25 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v26.l, v37.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v101, v26 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v27.l, v36.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v149, v27 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v28.l, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v28.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v149, v28 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v29.l, v52.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v29.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v101, v27 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v28.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v101, v28 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v29.l, v34.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v149, v29 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v30.l, v51.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v149, v30 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v31.l, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v101, v29 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v30.l, v33.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v101, v30 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v31.l, v32.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v149, v31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v101, v31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 +; GFX11-TRUE16-NEXT: .LBB58_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB58_2 -; GFX11-TRUE16-NEXT: .LBB58_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v149.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v146.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB58_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v101.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v101.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v98.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v98.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v97.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v96.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v134.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v31, v3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v86.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v148.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v100.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v148.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v100.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v31, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v31.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v147.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v147.h, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v132.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v131.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v99.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v99.h, v2.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v84.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v83.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v31, v5 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v130.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v130.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v82.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v82.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v144.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v145.l, v3.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v96.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v97.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v31, v6 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v135.l, v4.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v135.h, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v119.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v119.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v87.l, v4.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v87.h, v4.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v71.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v31, v7 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v118.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v133.l, v5.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v133.h, v5.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v85.l, v5.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v85.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v31, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v131.h, v6.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v132.l, v6.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v83.h, v6.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v84.l, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v115.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v47.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v46.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v31, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v113.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v129.l, v7.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v129.h, v7.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v81.l, v7.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v81.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v31, v10 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v128.l, v8.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v128.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v103.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v103.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v80.l, v8.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v80.h, v8.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v42.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v31, v11 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v101.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v100.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v41.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v40.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v117.h, v9.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v118.l, v9.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v69.h, v9.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v70.l, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v31, v12 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v116.l, v10.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v116.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v99.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v98.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v68.l, v10.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v68.h, v10.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v182.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v31, v13 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v96.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v96.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v181.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v180.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v114.l, v11.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v114.h, v11.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v67.l, v11.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v67.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v31, v14 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v112.h, v12.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v113.l, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v86.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v66.l, v12.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v66.h, v12.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v178.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v31, v15 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v84.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v176.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v102.l, v13.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v102.h, v13.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v65.l, v13.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v65.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v31, v16 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v100.h, v14.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v101.l, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v82.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v81.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v64.l, v14.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v64.h, v14.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v31, v17 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v80.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v165.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v164.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v98.h, v15.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v99.l, v15.h +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v55.l, v15.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v55.h, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v16.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v31, v18 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v97.l, v16.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v97.h, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v69.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v69.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v54.l, v16.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v54.h, v16.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v163.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v162.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v31, v19 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v68.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v161.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v160.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v87.l, v17.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v87.h, v17.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v53.l, v17.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v53.h, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v18.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v31, v20 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v85.l, v18.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v85.h, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v52.l, v18.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v52.h, v18.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v151.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v150.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v31, v21 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v149.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v148.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v83.l, v19.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v83.h, v19.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v51.l, v19.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v51.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v31, v22 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v81.h, v20.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v82.l, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v50.l, v20.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v50.h, v20.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v146.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v31, v23 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v145.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v144.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v71.l, v21.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v71.h, v21.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v49.l, v21.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v49.h, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v22.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v31, v24 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v70.l, v22.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v70.h, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v48.l, v22.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v48.h, v22.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v134.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v31, v25 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v132.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v67.h, v23.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v68.l, v23.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v39.l, v23.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v39.h, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v24.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v31, v26 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v66.l, v24.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v66.h, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v38.l, v24.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v38.h, v24.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v131.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v31, v27 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v128.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v64.l, v25.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v64.h, v25.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v37.l, v25.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v37.h, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v26.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v31, v28 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v54.h, v26.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v55.l, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v36.l, v26.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v36.h, v26.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v31, v29 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v116.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v53.h, v27.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v54.l, v27.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v35.l, v27.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v35.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v28.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v31, v30 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v52.h, v28.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v53.l, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v35.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v34.l, v28.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v34.h, v28.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v35 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v51.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v52.l, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v33.l, v29.l +; GFX11-TRUE16-NEXT: v_or_b16 v33.h, v33.h, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v32.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.h, v30.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v51.l, v30.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v33.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v33.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v32.l, v30.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v32.h, v30.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v31, v33 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l @@ -87745,7 +86056,48 @@ define <16 x i64> @bitcast_v128i8_to_v16i64(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v32.h, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v31.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-TRUE16-NEXT: .LBB58_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:516 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:536 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v16i64: @@ -92011,1887 +90363,946 @@ define inreg <16 x i64> @bitcast_v128i8_to_v16i64_scalar(<128 x i8> inreg %a, i3 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v16i64_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v95, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v104, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v105, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v106, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v107, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v108, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v109, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v110, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v111, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v102, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v166, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v180, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB59_3 -; GFX11-TRUE16-NEXT: .LBB59_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB59_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB59_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB59_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v16i64_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v87, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v97, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v113, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v128, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v130, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v95, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v104, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v105, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v106, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v107, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v108, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v109, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v110, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v111, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v66, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v70, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v71, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v81, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v83, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v85, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v101, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v102, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v112, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v147, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v166, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v180, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB59_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB59_3 -; GFX11-FAKE16-NEXT: .LBB59_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB59_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB59_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB59_2 +; GFX11-LABEL: bitcast_v128i8_to_v16i64_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:352 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 +; GFX11-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 +; GFX11-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 +; GFX11-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 +; GFX11-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 +; GFX11-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 +; GFX11-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v87, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v97, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v113, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v128, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v130, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v95, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v104, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v105, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v106, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v107, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v108, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v109, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v110, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v111, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v66, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v70, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v71, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v81, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v83, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v85, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v101, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v102, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v112, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v147, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v166, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v180, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v57, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v58, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v40, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v43, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v176, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v178, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v179, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v149, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v150, 8, v87 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v151, 8, v96 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v160, 8, v97 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v98 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v99 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v133, 8, v113 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v114 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v135, 8, v115 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v144, 8, v116 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v119, 8, v117 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v128 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v129, 8, v129 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v130 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v131, 8, v131 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v94 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v114, 8, v95 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v115, 8, v104 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v116, 8, v105 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v117, 8, v106 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v107 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v96, 8, v108 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v109 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v98, 8, v110 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v99, 8, v111 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB59_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v54 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v53 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: v_or_b32_e32 v0, v0, v90 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v91 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: v_or_b32_e32 v5, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v49 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v76 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v77 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v88 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v63 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v9, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v73 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v10, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v33 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v74 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v75 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v62 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v58 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v56 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v47 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v60 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v13, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v46 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v45 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v14, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v41 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v42 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v15, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v43 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v16, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v165 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v167 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v176 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v164 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v163 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v178 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v162 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v148 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v179 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v149 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v19, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v147 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v150 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v151 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v20, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v161 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v21, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v112 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v103 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v132 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v133 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v101 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v134 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v23, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v86 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v144 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v24, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v85 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v84 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v128 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v129 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v25, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v83 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v130 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v131 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v26, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v81 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v80 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v114 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v71 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v70 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v116 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v68 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v117 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v87 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v29, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v67 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v66 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v96 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v97 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v30, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v65 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v98 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v99 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v31, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v55 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s9, s9, 0xffff +; GFX11-NEXT: s_lshl_b32 s10, s10, 16 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v51 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v52 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_or_b32_e32 v3, v3, v93 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v92 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v6, v2, v3 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB59_3 +; GFX11-NEXT: .LBB59_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v55 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v54 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v52 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v51 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v38 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v89, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v90, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v92, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v93, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v7, v88, v7 +; GFX11-NEXT: v_or_b32_e32 v11, v74, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v75, v12 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_or_b32_e32 v6, v3, v6 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v35 +; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v12 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v46 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v181 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v79, v3 +; GFX11-NEXT: v_or_b32_e32 v7, v63, v7 +; GFX11-NEXT: v_or_b32_e32 v8, v72, v8 +; GFX11-NEXT: v_or_b32_e32 v10, v73, v10 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_or_b32_e32 v12, v61, v12 +; GFX11-NEXT: v_or_b32_e32 v16, v43, v16 +; GFX11-NEXT: v_or_b32_e32 v17, v44, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v7 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v13 +; GFX11-NEXT: v_or_b32_e32 v10, v14, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v62 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v56 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v45 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v162 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v118 +; GFX11-NEXT: v_or_b32_e32 v0, v57, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v58, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v59, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v60, v3 +; GFX11-NEXT: v_or_b32_e32 v12, v40, v12 +; GFX11-NEXT: v_or_b32_e32 v13, v41, v13 +; GFX11-NEXT: v_or_b32_e32 v15, v42, v15 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v21 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_or_b32_e32 v17, v179, v17 +; GFX11-NEXT: v_or_b32_e32 v21, v160, v21 +; GFX11-NEXT: v_or_b32_e32 v22, v161, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v18, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v18 +; GFX11-NEXT: v_or_b32_e32 v15, v19, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v166 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v164 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v163 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 3, v147 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 +; GFX11-NEXT: v_or_b32_e32 v21, v21, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v100 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 3, v83 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v167, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v176, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v177, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v178, v3 +; GFX11-NEXT: v_or_b32_e32 v17, v149, v17 +; GFX11-NEXT: v_or_b32_e32 v18, v150, v18 +; GFX11-NEXT: v_or_b32_e32 v20, v151, v20 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 +; GFX11-NEXT: v_or_b32_e32 v22, v144, v22 +; GFX11-NEXT: v_or_b32_e32 v26, v130, v26 +; GFX11-NEXT: v_or_b32_e32 v27, v131, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v19, v19, v23 +; GFX11-NEXT: v_or_b32_e32 v20, v24, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v112 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v102 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v101 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v86 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 3, v85 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 3, v84 +; GFX11-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v23, 0xff, v23 +; GFX11-NEXT: v_and_b32_e32 v25, 0xff, v25 +; GFX11-NEXT: v_or_b32_e32 v26, v26, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v69 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v133, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v134, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v135, v3 +; GFX11-NEXT: v_or_b32_e32 v22, v119, v22 +; GFX11-NEXT: v_or_b32_e32 v23, v128, v23 +; GFX11-NEXT: v_or_b32_e32 v25, v129, v25 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 +; GFX11-NEXT: v_or_b32_e32 v27, v117, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v28, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v23, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v24, v24, v28 +; GFX11-NEXT: v_or_b32_e32 v25, v29, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v81 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v80 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v71 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v70 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 3, v66 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 3, v65 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 3, v64 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xff, v31 +; GFX11-NEXT: v_and_b32_e32 v32, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v113, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v114, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v115, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v116, v3 +; GFX11-NEXT: v_or_b32_e32 v27, v87, v27 +; GFX11-NEXT: v_or_b32_e32 v28, v96, v28 +; GFX11-NEXT: v_or_b32_e32 v30, v97, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v98, v31 +; GFX11-NEXT: v_or_b32_e32 v32, v99, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v32 +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v29, v29, v33 +; GFX11-NEXT: v_or_b32_e32 v30, v34, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB59_3: ; %end +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:440 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:444 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:448 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:452 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:456 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:460 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:464 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:468 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:472 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:476 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB59_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB59_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -121573,876 +118984,913 @@ define <16 x double> @bitcast_v128i8_to_v16f64(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v51, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:536 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:412 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:392 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v49, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v114, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v81, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v97, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v98, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v102, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v160, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v161, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v161, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v162, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v163, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v164, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v165, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v82, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v99, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v101, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:152 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v105, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v106, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v107, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v108, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v117.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v118.h, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.l, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v119.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v130.h, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v131.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v144.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v145.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v146.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.l, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v70.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v71.h, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v82.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v83.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v84.h, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v86.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v96.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v97.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v98.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v33.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v58.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v48.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v50.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v114 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v51 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v81.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v52.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v53.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v54.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v83.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v55.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v85.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v64.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v85.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v65.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v66.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v87.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v67.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v97.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v98.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v100.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v101.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v102.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v160.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v160.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v161.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v161.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v162.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v162.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v163.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v163.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v104.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v164.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v105.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v164.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v106.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v165.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v107.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v165.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v71.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v71.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v70.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v70.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v68.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v67.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v66.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v66.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v64.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v64.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v55.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v54.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v53.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v52.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v51.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v50.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v108.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB74_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB74_4 -; GFX11-TRUE16-NEXT: .LBB74_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB74_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v149.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v149.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v146.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v0.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v151.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v0.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v150.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v145.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v144.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v149, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v1.h, v150.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v132.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v148.h -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v130.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v2.l, v148.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v2.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v134.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v145.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v130.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v149, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v3.l, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v147.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v3.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v135.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v119.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v119.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v118.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v149, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v4.l, v144.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v4.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v133.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB74_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v101.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v101.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v98.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v0.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v103.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v102.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v97.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v96.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v101, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v1.h, v102.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v84.h +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v100.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v82.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v2.l, v100.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v86.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v97.l +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v82.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v101, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v3.l, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v87.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v71.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v70.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v101, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v4.l, v96.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v85.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v69.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v115.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v115.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v149, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v5.l, v135.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v5.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v129.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v113.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v112.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v149, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v6.l, v133.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v6.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v128.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v103.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v101.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v149, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v7.l, v131.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v7.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v118.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v99.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v149, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v8.l, v129.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v8.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v149, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v9.l, v128.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v9.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v113.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v149, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v10.l, v117.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v10.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v102.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v82.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v81.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v149, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v11.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v99.l -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v149, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v12.l, v114.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v12.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v97.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v149, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v13.l, v112.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v13.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v149, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v14.l, v102.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v14.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v85.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v149, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v15.l, v100.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v15.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v149, v15 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v16.l, v98.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v16.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v149, v16 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v17.l, v97.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v17.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v70.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v18.l, v87.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v18.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v149, v18 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v19.l, v85.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v19.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v149, v19 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v20.l, v83.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v20.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v54.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v149, v20 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v21.l, v81.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v21.l, v149.h -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v149, v21 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v22.l, v71.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v22.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v5.l, v87.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v81.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v101, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v6.l, v85.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v101, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v7.l, v83.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v101, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v8.l, v81.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v67.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v101, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v9.l, v80.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v66.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v101, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v10.l, v69.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v65.h +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v101, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v55.h +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v101, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v12.l, v67.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v54.h +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v101, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v13.l, v66.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v53.h +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v101, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v14.l, v65.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v51.h +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v101, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v15.l, v64.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v50.h +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v101, v15 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v16.l, v55.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v49.h +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v101, v16 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v17.l, v54.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v101, v17 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v18.l, v53.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v101, v18 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v19.l, v52.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v101, v19 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v20.l, v51.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v35.h +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v101, v20 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v21.l, v50.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v101.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v101, v21 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v22.l, v49.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v112.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v149, v22 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v23.l, v70.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v23.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v51.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v101, v22 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v23.l, v48.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v32.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v149, v23 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v24.l, v67.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v24.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v149, v24 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v25.l, v66.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v25.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v101, v23 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v24.l, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v101, v24 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v25.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v149, v25 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v26.l, v64.l -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v26.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v149, v26 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v27.l, v54.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v27.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v101, v25 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v26.l, v37.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v101, v26 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v27.l, v36.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v149, v27 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v28.l, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v28.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v149, v28 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v29.l, v52.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v29.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v101, v27 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v28.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v101, v28 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v29.l, v34.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v149, v29 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v30.l, v51.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v30.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v149, v30 -; GFX11-TRUE16-NEXT: v_or_b16 v149.l, v31.l, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v31.l, v149.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v101, v29 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v30.l, v33.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v101, v30 +; GFX11-TRUE16-NEXT: v_or_b16 v101.l, v31.l, v32.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v101.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v149, v31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v101, v31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 +; GFX11-TRUE16-NEXT: .LBB74_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB74_2 -; GFX11-TRUE16-NEXT: .LBB74_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v149.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v146.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB74_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v101.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v101.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v98.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v98.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v97.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v96.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v134.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v31, v3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v86.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v148.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v100.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v148.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v100.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v31, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v31.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v147.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v147.h, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v132.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v131.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v99.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v99.h, v2.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v84.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v83.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v31, v5 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v130.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v130.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v82.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v82.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v144.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v145.l, v3.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v96.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v97.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v31, v6 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v135.l, v4.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v135.h, v4.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v119.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v119.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v87.l, v4.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v87.h, v4.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v71.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v31, v7 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v118.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v133.l, v5.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v133.h, v5.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v85.l, v5.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v85.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v31, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v131.h, v6.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v132.l, v6.h +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v83.h, v6.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v84.l, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v115.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v47.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v46.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v31, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v113.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v129.l, v7.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v129.h, v7.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v81.l, v7.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v81.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v31, v10 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v128.l, v8.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v128.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v103.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v103.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v80.l, v8.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v80.h, v8.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v42.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v31, v11 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v101.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v100.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v41.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v40.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v117.h, v9.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v118.l, v9.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v69.h, v9.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v70.l, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v31, v12 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v116.l, v10.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v116.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v99.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v98.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v68.l, v10.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v68.h, v10.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v182.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v31, v13 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v96.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v96.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v181.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v180.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v114.l, v11.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v114.h, v11.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v67.l, v11.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v67.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v31, v14 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v112.h, v12.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v113.l, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v86.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v66.l, v12.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v66.h, v12.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v178.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v31, v15 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v84.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v176.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v102.l, v13.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v102.h, v13.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v65.l, v13.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v65.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v31, v16 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v100.h, v14.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v101.l, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v82.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v81.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v64.l, v14.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v64.h, v14.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v31, v17 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v80.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, v165.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, v164.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v98.h, v15.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v99.l, v15.h +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v55.l, v15.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v55.h, v15.h ; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v16.l ; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v16.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v31, v18 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v97.l, v16.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v97.h, v16.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v69.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v69.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v54.l, v16.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v54.h, v16.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v163.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v162.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v31, v19 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v17.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v68.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, v161.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, v160.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v87.l, v17.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v87.h, v17.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v53.l, v17.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v53.h, v17.h ; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v18.l ; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v18.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v31, v20 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v85.l, v18.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v85.h, v18.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v52.l, v18.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v52.h, v18.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v151.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v150.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v31, v21 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v19.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, v149.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, v148.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v83.l, v19.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v83.h, v19.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v51.l, v19.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v51.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v20.l ; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v31, v22 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v81.h, v20.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v82.l, v20.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v50.l, v20.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v50.h, v20.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v146.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v31, v23 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v21.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, v145.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, v144.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v71.l, v21.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v71.h, v21.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v49.l, v21.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v49.h, v21.h ; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v22.l ; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v22.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v31, v24 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v70.l, v22.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v70.h, v22.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v48.l, v22.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v48.h, v22.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v134.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v31, v25 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v23.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, v132.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v67.h, v23.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v68.l, v23.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v39.l, v23.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v39.h, v23.h ; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v24.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v31, v26 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v66.l, v24.l -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v66.h, v24.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v38.l, v24.l +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v38.h, v24.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v131.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v31, v27 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v25.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, v128.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v64.l, v25.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v64.h, v25.h +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v37.l, v25.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v37.h, v25.h ; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v26.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v31, v28 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v54.h, v26.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v55.l, v26.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v36.l, v26.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v36.h, v26.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v31, v29 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v27.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, v116.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v53.h, v27.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v54.l, v27.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v35.l, v27.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v35.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v28.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v31, v30 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v52.h, v28.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v53.l, v28.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v35.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v34.l, v28.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v34.h, v28.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v31, v35 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v29.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v34.h, 0x300, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v51.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v52.l, v29.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v33.l, v29.l +; GFX11-TRUE16-NEXT: v_or_b16 v33.h, v33.h, v29.h ; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v30.l ; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v30.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v32.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.h, v30.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v51.l, v30.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v31, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v33.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v33.h, 0x300, v33.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v32.l, v30.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v32.h, v30.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v31, v33 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v32.l @@ -122450,7 +119898,48 @@ define <16 x double> @bitcast_v128i8_to_v16f64(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v32.h, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v31.h ; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-TRUE16-NEXT: .LBB74_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:516 +; GFX11-TRUE16-NEXT: s_clause 0x4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:524 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:528 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:532 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:536 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v16f64: @@ -126716,1887 +124205,946 @@ define inreg <16 x double> @bitcast_v128i8_to_v16f64_scalar(<128 x i8> inreg %a, ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v16f64_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v87, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v95, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v104, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v105, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v106, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v107, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v108, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v109, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v110, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v111, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v81, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v83, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v102, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v112, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v166, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v180, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB75_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB75_3 -; GFX11-TRUE16-NEXT: .LBB75_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB75_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-TRUE16-NEXT: s_clause 0x7 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB75_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB75_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v16f64_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v87, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v97, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v113, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v128, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v130, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v95, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v104, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v105, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v106, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v107, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v108, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v109, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v110, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v111, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v66, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v70, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v71, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v81, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v83, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v85, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v101, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v102, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v112, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v147, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v166, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v180, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v57, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v58, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v40, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v43, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v176, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v178, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v179, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v149, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v150, 8, v87 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v151, 8, v96 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v160, 8, v97 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v98 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v99 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 8, v113 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 8, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 8, v116 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 8, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 8, v129 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v130 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 8, v131 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v94 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 8, v95 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 8, v104 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v116, 8, v105 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 8, v106 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v107 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v96, 8, v108 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v109 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 8, v110 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 8, v111 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB75_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v54 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v90 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v91 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v76 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v77 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v88 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v63 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v73 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v74 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v75 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v56 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v60 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v46 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v40 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v41 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v42 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v43 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v165 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v167 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v176 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v164 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v163 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v178 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v162 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v148 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v179 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v149 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v150 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v161 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v103 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v132 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v133 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v101 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v144 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v84 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v129 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v130 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v80 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v114 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v68 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v87 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v67 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v96 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v98 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v99 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v0, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s9, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s10, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v93 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v92 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v2, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB75_3 -; GFX11-FAKE16-NEXT: .LBB75_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v55 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v54 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v52 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v51 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v89, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v90, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v92, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v93, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v88, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v74, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v75, v12 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v3, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v46 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v79, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v63, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v72, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v73, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v61, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v43, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v44, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v14, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v32 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v62 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v56 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v45 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v162 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v118 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v57, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v58, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v59, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v60, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v40, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v41, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v42, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v179, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v160, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v161, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v19, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v166 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v164 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 3, v147 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, v21, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v100 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 3, v83 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v167, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v176, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v177, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v178, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v149, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v150, v18 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v151, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v144, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v130, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v131, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, v19, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v24, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v112 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v102 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 3, v86 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 3, v85 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 3, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, v26, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v133, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v134, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v135, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v119, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v128, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v129, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v117, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v24, v24, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, v29, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v81 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v80 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v71 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 3, v66 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 3, v65 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 3, v64 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xff, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xff, v31 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v113, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v114, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v115, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v116, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v87, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v96, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v97, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v98, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, v99, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v27 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, v29, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v30, v34, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, v31, v32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB75_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:444 -; GFX11-FAKE16-NEXT: s_clause 0x7 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:448 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:452 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:456 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:460 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:464 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:468 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:472 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:476 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB75_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB75_2 +; GFX11-LABEL: bitcast_v128i8_to_v16f64_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:352 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 +; GFX11-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 +; GFX11-NEXT: v_dual_mov_b32 v36, v22 :: v_dual_mov_b32 v37, v20 +; GFX11-NEXT: v_dual_mov_b32 v38, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v48, v14 :: v_dual_mov_b32 v49, v12 +; GFX11-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v8 +; GFX11-NEXT: v_dual_mov_b32 v52, v6 :: v_dual_mov_b32 v53, v4 +; GFX11-NEXT: v_dual_mov_b32 v54, v2 :: v_dual_mov_b32 v55, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v87, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v97, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v113, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v128, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v130, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v95, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v104, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v105, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v106, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v107, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v108, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v109, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v110, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v111, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v66, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v70, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v71, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v81, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v83, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v85, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v101, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v102, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v112, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v147, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v166, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v180, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v57, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v58, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v40, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v43, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v176, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v178, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v179, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v149, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v150, 8, v87 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v151, 8, v96 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v160, 8, v97 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v98 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v99 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v133, 8, v113 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v114 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v135, 8, v115 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v144, 8, v116 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v119, 8, v117 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v128 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v129, 8, v129 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v130 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v131, 8, v131 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v94 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v114, 8, v95 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v115, 8, v104 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v116, 8, v105 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v117, 8, v106 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v107 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v96, 8, v108 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v109 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v98, 8, v110 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v99, 8, v111 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB75_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v54 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v53 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: v_or_b32_e32 v0, v0, v90 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v91 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: v_or_b32_e32 v5, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v49 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v76 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v77 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v88 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v63 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v9, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v73 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v10, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v33 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v74 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v75 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v62 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v58 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v56 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v47 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v60 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v13, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v46 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v45 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v40 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v14, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v41 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v42 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v15, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v43 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v16, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v165 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v167 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v176 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v164 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v163 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v178 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v162 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v148 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v179 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v149 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v19, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v147 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v150 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v151 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v20, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v161 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v21, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v112 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v103 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v132 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v133 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v101 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v134 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v23, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v86 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v144 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v24, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v85 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v84 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v128 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v129 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v25, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v83 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v130 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v131 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v26, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v81 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v80 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v114 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v71 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v70 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v116 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v68 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v117 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v87 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v29, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v67 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v66 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v96 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v1, v1, v97 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v30, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v65 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v98 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v99 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v31, v0, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v55 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s9, s9, 0xffff +; GFX11-NEXT: s_lshl_b32 s10, s10, 16 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v51 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v52 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_or_b32_e32 v3, v3, v93 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v92 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_or_b32_e32 v6, v2, v3 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB75_3 +; GFX11-NEXT: .LBB75_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v55 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v54 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v52 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v51 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v38 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v89, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v90, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v92, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v93, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v7, v88, v7 +; GFX11-NEXT: v_or_b32_e32 v11, v74, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v75, v12 +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_or_b32_e32 v6, v3, v6 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v35 +; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v12 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v46 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v181 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v180 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v79, v3 +; GFX11-NEXT: v_or_b32_e32 v7, v63, v7 +; GFX11-NEXT: v_or_b32_e32 v8, v72, v8 +; GFX11-NEXT: v_or_b32_e32 v10, v73, v10 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 0x300, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_or_b32_e32 v12, v61, v12 +; GFX11-NEXT: v_or_b32_e32 v16, v43, v16 +; GFX11-NEXT: v_or_b32_e32 v17, v44, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v7 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_or_b32_e32 v7, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v8, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v13 +; GFX11-NEXT: v_or_b32_e32 v10, v14, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v32 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v62 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v56 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v45 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v162 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v118 +; GFX11-NEXT: v_or_b32_e32 v0, v57, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v58, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v59, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v60, v3 +; GFX11-NEXT: v_or_b32_e32 v12, v40, v12 +; GFX11-NEXT: v_or_b32_e32 v13, v41, v13 +; GFX11-NEXT: v_or_b32_e32 v15, v42, v15 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v21 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v13 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_or_b32_e32 v17, v179, v17 +; GFX11-NEXT: v_or_b32_e32 v21, v160, v21 +; GFX11-NEXT: v_or_b32_e32 v22, v161, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v18, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v21 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_or_b32_e32 v12, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v18 +; GFX11-NEXT: v_or_b32_e32 v15, v19, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v166 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v164 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v163 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 3, v147 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 +; GFX11-NEXT: v_or_b32_e32 v21, v21, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v100 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 3, v83 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v82 +; GFX11-NEXT: v_or_b32_e32 v0, v167, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v176, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v177, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v178, v3 +; GFX11-NEXT: v_or_b32_e32 v17, v149, v17 +; GFX11-NEXT: v_or_b32_e32 v18, v150, v18 +; GFX11-NEXT: v_or_b32_e32 v20, v151, v20 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v17 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v20 +; GFX11-NEXT: v_or_b32_e32 v22, v144, v22 +; GFX11-NEXT: v_or_b32_e32 v26, v130, v26 +; GFX11-NEXT: v_or_b32_e32 v27, v131, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 16, v17 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v17, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v18, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v19, v19, v23 +; GFX11-NEXT: v_or_b32_e32 v20, v24, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v112 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v102 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v101 +; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 3, v86 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 3, v85 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 3, v84 +; GFX11-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v23, 0xff, v23 +; GFX11-NEXT: v_and_b32_e32 v25, 0xff, v25 +; GFX11-NEXT: v_or_b32_e32 v26, v26, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v69 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v133, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v134, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v135, v3 +; GFX11-NEXT: v_or_b32_e32 v22, v119, v22 +; GFX11-NEXT: v_or_b32_e32 v23, v128, v23 +; GFX11-NEXT: v_or_b32_e32 v25, v129, v25 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v23 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v25 +; GFX11-NEXT: v_or_b32_e32 v27, v117, v27 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v28, 16, v22 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_or_b32_e32 v22, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v23, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v24, v24, v28 +; GFX11-NEXT: v_or_b32_e32 v25, v29, v25 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v81 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v80 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v71 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v70 +; GFX11-NEXT: v_and_b32_e32 v29, 0xffff, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 3, v66 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 3, v65 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 3, v64 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v27 +; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xff, v31 +; GFX11-NEXT: v_and_b32_e32 v32, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v113, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v114, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v115, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v116, v3 +; GFX11-NEXT: v_or_b32_e32 v27, v87, v27 +; GFX11-NEXT: v_or_b32_e32 v28, v96, v28 +; GFX11-NEXT: v_or_b32_e32 v30, v97, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v98, v31 +; GFX11-NEXT: v_or_b32_e32 v32, v99, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v27 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v30 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v32 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v27 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v32 +; GFX11-NEXT: v_or_b32_e32 v27, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v28, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v29, v29, v33 +; GFX11-NEXT: v_or_b32_e32 v30, v34, v30 +; GFX11-NEXT: v_or_b32_e32 v31, v31, v32 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB75_3: ; %end +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:440 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:444 +; GFX11-NEXT: s_clause 0x7 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:448 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:452 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:456 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:460 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:464 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:468 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:472 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:476 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB75_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB75_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -147422,766 +143970,814 @@ define <64 x bfloat> @bitcast_v128i8_to_v64bf16(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v150, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v147, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v149, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v148, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v146, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v145, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v134, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v144, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:392 ; 4-byte Folded Spill +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v135, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v132, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v134, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v133, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v160, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v114, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v116, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v117, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v118, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v119, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v128, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v129, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:152 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v130, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v131, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v132, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v151, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v67.l, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.l, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.l, 8, v29.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v150.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v150.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v147.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.h, 8, v149.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.l, 8, v149.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v148.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v145.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v148.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v147.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.h, 8, v146.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.l, 8, v146.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.h, 8, v145.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.h, 8, v134.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v144.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.l, 8, v144.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v135.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v132.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v135.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.l, 8, v134.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v133.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v19.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v29.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v103.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v102.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v58.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v101.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v103.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v113.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.h, 8, v113.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v114.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.l, 8, v114.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.h, 8, v115.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v116.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v116.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.l, 8, v117.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v117.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v100.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.l, 8, v98.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.l, 8, v96.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v84.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v118.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v65 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.h, 8, v118.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.l, 8, v119.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v119.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.h, 8, v128.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v128.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v129.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v129.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.l, 8, v130.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v130.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.h, 8, v131.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.l, 8, v131.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v132.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v133.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.h, 8, v151.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.h, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v151.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v104.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB88_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB88_4 -; GFX11-TRUE16-NEXT: .LBB88_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB88_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v51.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v51.l -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v54.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v55.l -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v64.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB88_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v34.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v39.l +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v38.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v35.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v67.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v66.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v70.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v71.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v85.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v97.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v85.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v100.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v50.h -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v81.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v81.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v82.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v83.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v97.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v70.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v98.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v99.h -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v87.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v101.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v102.l -; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v102.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v103.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v112.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v112.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v113.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v113.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v115.l -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v115.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v117.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v118.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v118.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v119.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v114.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v119.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v128.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v128.h -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v129.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v130.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v117.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v130.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v131.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v129.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v132.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v133.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v133.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v135.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v135.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v144.l -; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v144.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v134.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v145.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v146.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v148.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v145.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v148.h -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v149.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v147.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v150.l -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v150.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v151.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v112.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v49.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v50.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v51.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v52.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v50.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v53.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v54.l +; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v54.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v55.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v64.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v53.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v65.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v65.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v67.l +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v67.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v69.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v66.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v70.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v70.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v71.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v66.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v71.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v80.l +; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v81.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v82.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v69.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v82.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v83.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v81.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v84.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v85.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v85.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v87.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v87.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v96.l +; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v96.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v86.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v97.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v98.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v100.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v97.l +; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v100.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v101.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v99.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v102.l +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v102.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v103.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 +; GFX11-TRUE16-NEXT: .LBB88_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB88_2 -; GFX11-TRUE16-NEXT: .LBB88_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v50.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v50.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB88_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v128.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v148.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v100.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v132.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v149.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v147.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v148.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v149.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v99.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v100.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v146.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v98.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v145.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v146.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v147.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v145.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v34.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v97.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v98.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v99.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v97.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v148.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v135.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v144.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v134.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v135.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v144.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v87.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v96.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v86.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v87.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v96.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v116.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(26) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v100.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v131.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(24) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v100.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v98.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v163.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v132.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v133.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v134.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v132.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v133.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v84.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v85.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v86.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v84.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v85.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(22) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v97.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v85.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v150.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v96.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v161.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v87.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v131.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v129.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v130.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v131.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v180.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v83.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v81.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v82.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v83.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v84.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v80.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v164.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v151.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v96.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v130.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v47.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v82.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v117.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v128.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v129.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v119.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v69.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v80.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v81.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v71.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v41.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v85.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v165.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v82.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v181.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v128.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v119.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v114.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v118.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v118.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v80.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v71.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v66.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v70.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v70.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v83.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v46.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v162.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v71.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v69.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v42.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v176.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v117.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v114.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v116.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v116.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v115.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v69.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v66.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v68.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v68.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v67.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v68.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v178.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v68.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v182.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v160.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v67.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v66.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v40.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v115.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v113.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v112.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v113.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v67.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v65.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v64.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v65.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v39.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v112.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v102.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v103.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v64.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v53.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v55.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v37.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v55.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v53.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v35.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v102.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v99.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v87.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v98.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v99.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v51.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v52.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v54.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v51.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v51.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v97.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v70.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v82.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v83.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v81.l, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v81.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v51.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v49.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v48.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v49.l, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, 0x300, v32.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, 0x300, v33.l +; GFX11-TRUE16-NEXT: .LBB88_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:520 ; 4-byte Folded Reload +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64bf16: @@ -152952,1657 +149548,831 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v64bf16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v41, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v44, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v59, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v60, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v61, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v63, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v72, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v73, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v74, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v75, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v77, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v78, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v79, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v88, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v89, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v90, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v91, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v92, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v93, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v57, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v58, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v40, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v179, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v176, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v178, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB89_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v119 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB89_3 -; GFX11-TRUE16-NEXT: .LBB89_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v179 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v179, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v119 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v119, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v119, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v179, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-TRUE16-NEXT: .LBB89_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB89_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB89_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64bf16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v41, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v44, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v59, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v60, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v61, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v63, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v72, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v73, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v75, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v76, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v77, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v78, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v79, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v88, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v89, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v90, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v91, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v92, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v93, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v57, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v58, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v40, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v43, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v178, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v176, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v160, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v135, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v179, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v149, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v151, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v144, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v133, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v119, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB89_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v178 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB89_3 -; GFX11-FAKE16-NEXT: .LBB89_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v118 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v118, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v178, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-FAKE16-NEXT: .LBB89_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB89_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB89_2 +; GFX11-LABEL: bitcast_v128i8_to_v64bf16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 +; GFX11-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 +; GFX11-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 +; GFX11-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 +; GFX11-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 +; GFX11-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v41, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v44, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v59, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v60, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v61, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v63, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v72, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v73, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v74, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v75, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v76, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v77, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v78, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v79, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v88, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v89, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v90, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v91, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v92, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v93, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v57, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v58, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v40, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v43, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v178, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v176, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v160, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v135, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v179, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v149, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v151, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v144, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v133, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v119, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v101, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v102, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v112, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v147, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v166, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v180, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v41 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v44 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v45, 8, v45 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v56 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v59 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v56, 8, v60 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v61 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v62 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v63 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v62, 8, v72 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v73 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v74 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v75 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v76 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v77 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v78 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v79 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v88 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v89 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v90 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v91 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v92 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v93 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v94 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB89_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: v_and_b32_e64 v5, 0xffff, s5 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v68 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v64 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v66 +; GFX11-NEXT: v_or_b32_e32 v6, v4, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v65 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v39 +; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v49 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v50 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v71 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v48 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v69 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v82 +; GFX11-NEXT: v_or_b32_e32 v9, v7, v80 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v81 +; GFX11-NEXT: v_lshl_or_b32 v7, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v53 +; GFX11-NEXT: v_lshl_or_b32 v8, v9, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v51 +; GFX11-NEXT: v_lshl_or_b32 v9, v10, 16, v3 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v84 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v86 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v83 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v96 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v85 +; GFX11-NEXT: v_or_b32_e32 v12, v10, v97 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v11, v87 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v99 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v103 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v114 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v98 +; GFX11-NEXT: v_lshl_or_b32 v12, v0, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v101 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v116 +; GFX11-NEXT: v_or_b32_e32 v17, v14, v128 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v112 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v117 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v130 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v133 +; GFX11-NEXT: v_or_b32_e32 v20, v14, v132 +; GFX11-NEXT: v_lshl_or_b32 v14, v0, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v148 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v119 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v129 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v161 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshl_or_b32 v13, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v144 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v134 +; GFX11-NEXT: v_or_b32_e32 v18, v18, v147 +; GFX11-NEXT: v_and_b32_e32 v22, 0xffff, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v167 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v15 +; GFX11-NEXT: v_lshl_or_b32 v15, v17, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v17, v18, 16, v22 +; GFX11-NEXT: v_mov_b32_e32 v2, s7 +; GFX11-NEXT: v_lshl_or_b32 v18, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v151 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v149 +; GFX11-NEXT: v_lshl_or_b32 v16, v20, 16, v21 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v180 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v19, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v165 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v162 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v42 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v41 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v20, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v179 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v45 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v21, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v131 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v56 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v22, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v60 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v23, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v150 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v63 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v62 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v24, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v163 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v160 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v73 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v25, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v176 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v164 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v75 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v74 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v26, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v178 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v77 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v76 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v27, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v28, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v43 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v40 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v88 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v29, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v47 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v46 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v91 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v90 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v30, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v58 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v92 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v93 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v31, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB89_3 +; GFX11-NEXT: .LBB89_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: s_waitcnt vmcnt(37) +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v58 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v57 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: s_waitcnt vmcnt(35) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v92, v0 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v46 +; GFX11-NEXT: v_or_b32_e32 v1, v93, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: s_waitcnt vmcnt(33) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v43 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v40 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v4 +; GFX11-NEXT: v_or_b32_e32 v3, v90, v3 +; GFX11-NEXT: s_waitcnt vmcnt(31) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v89, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(29) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v181 +; GFX11-NEXT: v_or_b32_e32 v0, v88, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v79, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v178 +; GFX11-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v3 +; GFX11-NEXT: s_waitcnt vmcnt(27) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v176 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v164 +; GFX11-NEXT: s_waitcnt vmcnt(25) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v163 +; GFX11-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v75, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v74, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v73, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(23) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v150 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v72, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(21) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v135 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v63, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(19) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v131 +; GFX11-NEXT: v_or_b32_e32 v0, v62, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v60, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v61, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v118 +; GFX11-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v59, v3 +; GFX11-NEXT: s_waitcnt vmcnt(17) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v179 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v115 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v56, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v162 +; GFX11-NEXT: v_or_b32_e32 v1, v45, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v44, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v42, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v151 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v41, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v149 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v144 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v180, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v133 +; GFX11-NEXT: v_or_b32_e32 v0, v177, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v166, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v167, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v129 +; GFX11-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v161, v3 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v117 +; GFX11-NEXT: s_waitcnt vmcnt(5) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v116 +; GFX11-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v147, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v114 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v99 +; GFX11-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v130, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v98 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v54 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v39 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 3, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v4, v113, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v128, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v100 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v101, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v102, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v96 +; GFX11-NEXT: v_or_b32_e32 v1, v134, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v97, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v55 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v52 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v87, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v51 +; GFX11-NEXT: v_or_b32_e32 v4, v86, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v85, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v84, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v49 +; GFX11-NEXT: v_or_b32_e32 v5, v83, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v48 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v82, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v5, v81, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v71, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v80, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v36 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 3, v35 +; GFX11-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v5, v69, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v34 +; GFX11-NEXT: v_or_b32_e32 v3, v112, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v68, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v67, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v65, v8 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v64, v4 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v37 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_lshl_or_b32 v6, v32, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v51 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v38 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-NEXT: v_lshl_or_b32 v7, v34, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v39, 16, v33 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v11 +; GFX11-NEXT: v_lshl_or_b32 v11, v50, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v1 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v12, v15, 16, v14 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v16, v16, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v116 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v129 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v18 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v17 +; GFX11-NEXT: v_lshl_or_b32 v26, v26, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v17, v114, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v18, v144, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v20, v20, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v21, v21, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v115 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v135 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v131 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v23 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v27 +; GFX11-NEXT: v_lshl_or_b32 v22, v145, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v23, v118, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v24, v24, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v25, v25, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v163 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v182 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v181 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v28 +; GFX11-NEXT: v_lshl_or_b32 v15, v2, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_lshl_or_b32 v19, v133, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v27, v160, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v28, v178, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v29, v29, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v30, v30, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v31, v31, 16, v36 +; GFX11-NEXT: .LBB89_3: ; %end +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:440 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB89_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB89_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -173855,766 +169625,814 @@ define <64 x half> @bitcast_v128i8_to_v64f16(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v150, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v147, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v149, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v148, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v146, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v145, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v134, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v144, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:392 ; 4-byte Folded Spill +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v135, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v132, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v134, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v133, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v160, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v114, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v116, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v117, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v118, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v119, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v128, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v129, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:152 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v130, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v131, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v132, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v151, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v67.l, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.l, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.l, 8, v29.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v150.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v150.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v147.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.h, 8, v149.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.l, 8, v149.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v148.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v145.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v148.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v147.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.h, 8, v146.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.l, 8, v146.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.h, 8, v145.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.h, 8, v134.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v144.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.l, 8, v144.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v135.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v132.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v135.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.l, 8, v134.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v133.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v19.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v29.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v103.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v102.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v58.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v101.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v103.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v113.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.h, 8, v113.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v114.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.l, 8, v114.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.h, 8, v115.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v116.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v116.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.l, 8, v117.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v117.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v100.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.l, 8, v98.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.l, 8, v96.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v84.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v118.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v65 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.h, 8, v118.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.l, 8, v119.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v119.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.h, 8, v128.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v128.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v129.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v129.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.l, 8, v130.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v130.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.h, 8, v131.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.l, 8, v131.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v132.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v133.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.h, 8, v151.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.h, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v151.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v104.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB92_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB92_4 -; GFX11-TRUE16-NEXT: .LBB92_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB92_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v51.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v51.l -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v54.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v55.l -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v64.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB92_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v34.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v39.l +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v38.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v35.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v67.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v66.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v70.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v71.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v85.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v97.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v85.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v100.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v50.h -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v81.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v81.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v82.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v83.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v97.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v70.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v98.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v99.h -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v87.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v101.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v102.l -; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v102.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v103.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v112.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v112.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v113.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v113.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v115.l -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v115.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v117.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v118.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v118.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v119.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v114.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v119.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v128.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v128.h -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v129.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v130.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v117.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v130.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v131.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v129.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v132.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v133.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v133.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v135.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v135.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v144.l -; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v144.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v134.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v145.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v146.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v148.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v145.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v148.h -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v149.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v147.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v150.l -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v150.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v151.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v112.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v49.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v50.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v51.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v52.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v50.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v53.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v54.l +; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v54.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v55.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v64.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v53.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v65.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v65.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v67.l +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v67.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v69.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v66.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v70.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v70.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v71.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v66.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v71.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v80.l +; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v81.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v82.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v69.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v82.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v83.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v81.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v84.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v85.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v85.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v87.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v87.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v96.l +; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v96.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v86.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v97.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v98.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v100.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v97.l +; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v100.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v101.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v99.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v102.l +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v102.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v103.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 +; GFX11-TRUE16-NEXT: .LBB92_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB92_2 -; GFX11-TRUE16-NEXT: .LBB92_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v50.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v50.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB92_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v128.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v148.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v100.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v132.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v149.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v147.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v148.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v149.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v99.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v100.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v146.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v98.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v145.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v146.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v147.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v145.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v34.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v97.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v98.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v99.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v97.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v148.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v135.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v144.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v134.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v135.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v144.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v87.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v96.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v86.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v87.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v96.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v116.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(26) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v100.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v131.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(24) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v100.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v98.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v163.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v132.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v133.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v134.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v132.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v133.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v84.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v85.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v86.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v84.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v85.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(22) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v97.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v85.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v150.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v96.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v161.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v87.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v131.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v129.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v130.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v131.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v180.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v83.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v81.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v82.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v83.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v84.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v80.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v164.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v151.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v96.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v130.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v47.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v82.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v117.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v128.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v129.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v119.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v69.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v80.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v81.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v71.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v41.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v85.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v165.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v82.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v181.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v128.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v119.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v114.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v118.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v118.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v80.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v71.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v66.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v70.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v70.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v83.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v46.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v162.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v71.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v69.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v42.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v176.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v117.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v114.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v116.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v116.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v115.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v69.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v66.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v68.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v68.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v67.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v68.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v178.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v68.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v182.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v160.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v67.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v66.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v40.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v115.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v113.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v112.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v113.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v67.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v65.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v64.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v65.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v39.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v112.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v102.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v103.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v64.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v53.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v55.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v37.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v55.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v53.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v35.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v102.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v99.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v87.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v98.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v99.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v51.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v52.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v54.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v51.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v51.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v97.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v70.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v82.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v83.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v81.l, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v81.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v51.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v49.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v48.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v49.l, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, 0x300, v32.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, 0x300, v33.l +; GFX11-TRUE16-NEXT: .LBB92_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:520 ; 4-byte Folded Reload +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64f16: @@ -179289,1657 +175107,831 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v64f16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v41, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v44, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v59, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v60, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v61, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v63, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v72, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v73, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v74, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v75, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v77, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v78, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v79, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v88, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v89, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v90, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v91, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v92, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v93, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v57, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v58, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v40, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v179, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v176, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v178, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB93_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v119 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB93_3 -; GFX11-TRUE16-NEXT: .LBB93_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v179 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v179, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v119 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v119, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v119, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v179, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-TRUE16-NEXT: .LBB93_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB93_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB93_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64f16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v41, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v44, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v59, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v60, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v61, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v63, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v72, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v73, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v75, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v76, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v77, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v78, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v79, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v88, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v89, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v90, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v91, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v92, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v93, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v57, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v58, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v40, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v43, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v178, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v176, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v160, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v135, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v179, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v149, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v151, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v144, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v133, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v119, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB93_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v178 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB93_3 -; GFX11-FAKE16-NEXT: .LBB93_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v118 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v118, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v178, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-FAKE16-NEXT: .LBB93_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB93_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB93_2 +; GFX11-LABEL: bitcast_v128i8_to_v64f16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 +; GFX11-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 +; GFX11-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 +; GFX11-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 +; GFX11-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 +; GFX11-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v41, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v44, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v59, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v60, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v61, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v63, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v72, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v73, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v74, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v75, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v76, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v77, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v78, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v79, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v88, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v89, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v90, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v91, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v92, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v93, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v57, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v58, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v40, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v43, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v178, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v176, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v160, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v135, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v179, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v149, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v151, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v144, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v133, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v119, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v101, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v102, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v112, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v147, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v166, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v180, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v41 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v44 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v45, 8, v45 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v56 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v59 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v56, 8, v60 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v61 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v62 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v63 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v62, 8, v72 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v73 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v74 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v75 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v76 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v77 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v78 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v79 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v88 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v89 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v90 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v91 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v92 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v93 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v94 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB93_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: v_and_b32_e64 v5, 0xffff, s5 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v68 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v64 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v66 +; GFX11-NEXT: v_or_b32_e32 v6, v4, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v65 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v39 +; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v49 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v50 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v71 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v48 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v69 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v82 +; GFX11-NEXT: v_or_b32_e32 v9, v7, v80 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v81 +; GFX11-NEXT: v_lshl_or_b32 v7, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v53 +; GFX11-NEXT: v_lshl_or_b32 v8, v9, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v51 +; GFX11-NEXT: v_lshl_or_b32 v9, v10, 16, v3 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v84 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v86 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v83 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v96 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v85 +; GFX11-NEXT: v_or_b32_e32 v12, v10, v97 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v11, v87 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v99 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v103 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v114 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v98 +; GFX11-NEXT: v_lshl_or_b32 v12, v0, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v101 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v116 +; GFX11-NEXT: v_or_b32_e32 v17, v14, v128 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v112 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v117 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v130 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v133 +; GFX11-NEXT: v_or_b32_e32 v20, v14, v132 +; GFX11-NEXT: v_lshl_or_b32 v14, v0, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v148 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v119 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v129 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v161 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshl_or_b32 v13, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v144 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v134 +; GFX11-NEXT: v_or_b32_e32 v18, v18, v147 +; GFX11-NEXT: v_and_b32_e32 v22, 0xffff, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v167 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v15 +; GFX11-NEXT: v_lshl_or_b32 v15, v17, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v17, v18, 16, v22 +; GFX11-NEXT: v_mov_b32_e32 v2, s7 +; GFX11-NEXT: v_lshl_or_b32 v18, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v151 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v149 +; GFX11-NEXT: v_lshl_or_b32 v16, v20, 16, v21 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v180 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v19, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v165 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v162 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v42 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v41 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v20, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v179 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v45 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v21, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v131 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v56 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v22, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v60 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v23, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v150 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v63 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v62 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v24, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v163 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v160 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v73 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v25, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v176 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v164 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v75 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v74 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v26, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v178 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v77 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v76 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v27, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v28, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v43 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v40 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v88 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v29, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v47 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v46 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v91 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v90 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v30, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v58 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v92 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v93 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v31, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB93_3 +; GFX11-NEXT: .LBB93_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: s_waitcnt vmcnt(37) +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v58 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v57 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: s_waitcnt vmcnt(35) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v92, v0 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v46 +; GFX11-NEXT: v_or_b32_e32 v1, v93, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: s_waitcnt vmcnt(33) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v43 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v40 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v4 +; GFX11-NEXT: v_or_b32_e32 v3, v90, v3 +; GFX11-NEXT: s_waitcnt vmcnt(31) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v89, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(29) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v181 +; GFX11-NEXT: v_or_b32_e32 v0, v88, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v79, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v178 +; GFX11-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v3 +; GFX11-NEXT: s_waitcnt vmcnt(27) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v176 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v164 +; GFX11-NEXT: s_waitcnt vmcnt(25) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v163 +; GFX11-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v75, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v74, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v73, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(23) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v150 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v72, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(21) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v135 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v63, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(19) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v131 +; GFX11-NEXT: v_or_b32_e32 v0, v62, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v60, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v61, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v118 +; GFX11-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v59, v3 +; GFX11-NEXT: s_waitcnt vmcnt(17) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v179 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v115 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v56, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v162 +; GFX11-NEXT: v_or_b32_e32 v1, v45, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v44, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v42, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v151 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v41, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v149 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v144 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v180, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v133 +; GFX11-NEXT: v_or_b32_e32 v0, v177, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v166, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v167, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v129 +; GFX11-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v161, v3 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v117 +; GFX11-NEXT: s_waitcnt vmcnt(5) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v116 +; GFX11-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v147, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v114 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v99 +; GFX11-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v130, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v98 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v54 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v39 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 3, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v4, v113, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v128, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v100 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v101, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v102, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v96 +; GFX11-NEXT: v_or_b32_e32 v1, v134, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v97, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v55 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v52 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v87, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v51 +; GFX11-NEXT: v_or_b32_e32 v4, v86, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v85, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v84, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v49 +; GFX11-NEXT: v_or_b32_e32 v5, v83, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v48 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v82, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v5, v81, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v71, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v80, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v36 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 3, v35 +; GFX11-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v5, v69, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v34 +; GFX11-NEXT: v_or_b32_e32 v3, v112, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v68, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v67, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v65, v8 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v64, v4 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v37 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_lshl_or_b32 v6, v32, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v51 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v38 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-NEXT: v_lshl_or_b32 v7, v34, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v39, 16, v33 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v11 +; GFX11-NEXT: v_lshl_or_b32 v11, v50, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v1 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v12, v15, 16, v14 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v16, v16, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v116 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v129 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v18 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v17 +; GFX11-NEXT: v_lshl_or_b32 v26, v26, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v17, v114, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v18, v144, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v20, v20, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v21, v21, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v115 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v135 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v131 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v23 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v27 +; GFX11-NEXT: v_lshl_or_b32 v22, v145, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v23, v118, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v24, v24, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v25, v25, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v163 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v182 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v181 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v28 +; GFX11-NEXT: v_lshl_or_b32 v15, v2, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_lshl_or_b32 v19, v133, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v27, v160, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v28, v178, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v29, v29, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v30, v30, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v31, v31, 16, v36 +; GFX11-NEXT: .LBB93_3: ; %end +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:440 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB93_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB93_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -196427,766 +191419,814 @@ define <64 x i16> @bitcast_v128i8_to_v64i16(<128 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v150, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v147, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v149, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v148, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v147, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v146, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v145, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v134, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v144, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:260 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:520 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:392 ; 4-byte Folded Spill +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:384 +; GFX11-TRUE16-NEXT: scratch_load_u16 v114, off, s32 offset:380 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:376 +; GFX11-TRUE16-NEXT: scratch_load_u16 v128, off, s32 offset:372 +; GFX11-TRUE16-NEXT: scratch_load_u16 v56, off, s32 offset:368 +; GFX11-TRUE16-NEXT: scratch_load_u16 v117, off, s32 offset:364 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:360 +; GFX11-TRUE16-NEXT: scratch_load_u16 v133, off, s32 offset:356 +; GFX11-TRUE16-NEXT: scratch_load_u16 v57, off, s32 offset:352 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:348 +; GFX11-TRUE16-NEXT: scratch_load_u16 v58, off, s32 offset:344 +; GFX11-TRUE16-NEXT: scratch_load_u16 v132, off, s32 offset:340 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:336 +; GFX11-TRUE16-NEXT: scratch_load_u16 v118, off, s32 offset:332 +; GFX11-TRUE16-NEXT: scratch_load_u16 v59, off, s32 offset:328 +; GFX11-TRUE16-NEXT: scratch_load_u16 v134, off, s32 offset:324 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:320 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:316 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:312 +; GFX11-TRUE16-NEXT: scratch_load_u16 v145, off, s32 offset:308 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:304 +; GFX11-TRUE16-NEXT: scratch_load_u16 v119, off, s32 offset:300 +; GFX11-TRUE16-NEXT: scratch_load_u16 v60, off, s32 offset:296 +; GFX11-TRUE16-NEXT: scratch_load_u16 v135, off, s32 offset:292 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:288 +; GFX11-TRUE16-NEXT: scratch_load_u16 v129, off, s32 offset:284 +; GFX11-TRUE16-NEXT: scratch_load_u16 v61, off, s32 offset:280 +; GFX11-TRUE16-NEXT: scratch_load_u16 v146, off, s32 offset:276 +; GFX11-TRUE16-NEXT: scratch_load_u16 v62, off, s32 offset:272 +; GFX11-TRUE16-NEXT: scratch_load_u16 v115, off, s32 offset:268 +; GFX11-TRUE16-NEXT: scratch_load_u16 v63, off, s32 offset:264 +; GFX11-TRUE16-NEXT: scratch_load_u16 v144, off, s32 offset:260 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v135, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v132, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v134, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v133, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v160, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v101, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v103, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v113, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v113, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v114, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v115, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v116, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v117, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v118, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v119, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v128, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v128, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v129, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v130, off, s32 offset:152 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:256 +; GFX11-TRUE16-NEXT: scratch_load_u16 v130, off, s32 offset:252 +; GFX11-TRUE16-NEXT: scratch_load_u16 v72, off, s32 offset:248 +; GFX11-TRUE16-NEXT: scratch_load_u16 v147, off, s32 offset:244 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:240 +; GFX11-TRUE16-NEXT: scratch_load_u16 v116, off, s32 offset:236 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:232 +; GFX11-TRUE16-NEXT: scratch_load_u16 v148, off, s32 offset:228 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:224 +; GFX11-TRUE16-NEXT: scratch_load_u16 v131, off, s32 offset:220 +; GFX11-TRUE16-NEXT: scratch_load_u16 v73, off, s32 offset:216 +; GFX11-TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:388 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_u16 v74, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v75, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v76, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v77, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v78, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v79, off, s32 offset:136 +; GFX11-TRUE16-NEXT: scratch_load_u16 v88, off, s32 offset:144 +; GFX11-TRUE16-NEXT: scratch_load_u16 v89, off, s32 offset:152 ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v130, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v131, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v132, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v151, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v100, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v85, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v97, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v87, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v80, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v86, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v96, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v85, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v84, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v83, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v90, off, s32 offset:160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v91, off, s32 offset:168 +; GFX11-TRUE16-NEXT: scratch_load_u16 v92, off, s32 offset:176 +; GFX11-TRUE16-NEXT: scratch_load_u16 v93, off, s32 offset:184 +; GFX11-TRUE16-NEXT: scratch_load_u16 v94, off, s32 offset:192 +; GFX11-TRUE16-NEXT: scratch_load_u16 v95, off, s32 offset:200 +; GFX11-TRUE16-NEXT: scratch_load_u16 v104, off, s32 offset:208 +; GFX11-TRUE16-NEXT: scratch_load_u16 v179, off, s32 offset:212 +; GFX11-TRUE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-TRUE16-NEXT: scratch_load_u16 v183, off, s32 offset:196 +; GFX11-TRUE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-TRUE16-NEXT: scratch_load_u16 v167, off, s32 offset:180 +; GFX11-TRUE16-NEXT: scratch_load_u16 v161, off, s32 offset:172 +; GFX11-TRUE16-NEXT: scratch_load_u16 v177, off, s32 offset:164 +; GFX11-TRUE16-NEXT: scratch_load_u16 v149, off, s32 offset:156 +; GFX11-TRUE16-NEXT: scratch_load_u16 v180, off, s32 offset:148 +; GFX11-TRUE16-NEXT: scratch_load_u16 v151, off, s32 offset:140 +; GFX11-TRUE16-NEXT: scratch_load_u16 v164, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v41, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v47, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v165, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v43, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v181, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v45, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v162, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v46, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v176, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v42, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v178, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v44, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v160, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v182, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.l, v30.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v67.l, v28.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, v26.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.h, v24.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v65.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.l, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v21.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v25.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v27.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.l, 8, v29.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.h, 8, v150.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v150.l, 8, v150.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.h, 8, v147.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.h, 8, v149.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v149.l, 8, v149.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.h, 8, v148.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.l, 8, v145.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v148.l, 8, v148.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v147.l, 8, v147.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.h, 8, v146.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v146.l, 8, v146.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v145.h, 8, v145.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.h, 8, v134.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.h, 8, v144.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v144.l, 8, v144.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.h, 8, v135.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.l, 8, v132.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v135.l, 8, v135.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v134.l, 8, v134.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.h, 8, v133.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v160 +; GFX11-TRUE16-NEXT: scratch_load_u16 v166, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v40, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v30.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v28.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v26.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v19.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v21.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v23.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v25.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v27.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.l, 8, v29.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v31.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.l, 8, v103.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.h, 8, v56.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v102.l, 8, v102.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.h, 8, v57.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.h, 8, v58.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v101.l, 8, v101.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v112.h, 8, v103.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.l, 8, v113.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v113.h, 8, v113.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v103.h, 8, v114.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.l, 8, v114.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v115.h, 8, v115.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.l, 8, v116.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v116.h, 8, v116.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.l, 8, v117.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.l, 8, v117.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.h, 8, v59.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.l, 8, v97.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v100.l, 8, v100.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v99.l, 8, v99.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.h, 8, v60.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v98.l, 8, v98.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v97.h, 8, v61.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.h, 8, v62.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.h, 8, v63.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v96.l, 8, v96.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.h, 8, v72.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.l, 8, v84.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v87.l, 8, v87.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v86.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.l, 8, v118.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.h, 8, v73.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v65 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v64.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.l, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v65.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.l, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v67.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.l, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v68.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.l, 8, v82.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.l, 8, v83.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v118.h, 8, v118.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v70.h, 8, v74.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.l, 8, v119.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.l, 8, v75.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v114.h, 8, v119.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v66.h, 8, v76.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(38) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v119.h, 8, v128.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v71.h, 8, v77.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.l, 8, v128.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.l, 8, v78.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(36) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v128.h, 8, v129.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v80.h, 8, v79.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.l, 8, v129.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.l, 8, v88.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(34) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.l, 8, v130.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.l, 8, v89.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v117.h, 8, v130.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v69.h, 8, v90.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v130.h, 8, v131.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v82.h, 8, v91.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.l, 8, v131.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.l, 8, v92.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v131.h, 8, v132.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v83.h, 8, v93.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v129.h, 8, v133.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v81.h, 8, v94.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v132.h, 8, v151.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v84.h, 8, v95.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v133.l, 8, v151.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.l, 8, v31.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v151.h, 8, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v85.l, 8, v104.l ; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB96_3 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow -; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB96_4 -; GFX11-TRUE16-NEXT: .LBB96_2: ; %end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB96_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v51.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v53.l -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v51.l -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v54.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v55.l -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v65.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v64.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v65.h -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v67.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB96_2 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v34.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v34.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v38.h +; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v39.l +; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v38.l +; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v35.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v67.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v66.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v68.l -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v66.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v70.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v68.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v71.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v69.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v83.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v69.l -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v84.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v82.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v85.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v80.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v96.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v86.h -; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v84.l -; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v80.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v87.h -; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v71.h -; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v96.l -; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v86.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v97.l -; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v85.h -; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v100.l -; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v98.l -; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v100.h -; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v48.l -; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v50.l -; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v50.h -; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v81.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v81.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v82.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v83.l -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v97.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v70.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v98.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v99.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v99.h -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v87.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v101.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v102.l -; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v102.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v103.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v112.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v101.l -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v112.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v113.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v113.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v103.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v115.l -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v115.h -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v116.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v116.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v117.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v114.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v118.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v118.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v119.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v114.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v119.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v128.l -; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v128.h -; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v129.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v130.l -; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v117.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v130.h -; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v131.l -; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v131.h -; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v129.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v132.h -; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v133.l -; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v133.h -; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v134.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v135.l -; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v132.l -; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v135.h -; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v144.l -; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v144.h -; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v134.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v145.h -; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v146.l -; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v146.h -; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v147.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v148.l -; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v145.l -; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v148.h -; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v149.l -; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v149.h -; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v147.h -; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v150.l -; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v150.h -; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v151.l -; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v151.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v40.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v166.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v182.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v160.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v44.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v178.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v42.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v176.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v46.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v162.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v45.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v181.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v43.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v165.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v47.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v41.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.l, 0xff, v164.l +; GFX11-TRUE16-NEXT: v_and_b16 v16.h, 0xff, v151.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.l, 0xff, v180.l +; GFX11-TRUE16-NEXT: v_and_b16 v17.h, 0xff, v149.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.l, 0xff, v177.l +; GFX11-TRUE16-NEXT: v_and_b16 v18.h, 0xff, v161.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.l, 0xff, v167.l +; GFX11-TRUE16-NEXT: v_and_b16 v19.h, 0xff, v150.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.l, 0xff, v183.l +; GFX11-TRUE16-NEXT: v_and_b16 v20.h, 0xff, v163.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.l, 0xff, v179.l +; GFX11-TRUE16-NEXT: v_and_b16 v21.h, 0xff, v131.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.l, 0xff, v148.l +; GFX11-TRUE16-NEXT: v_and_b16 v22.h, 0xff, v116.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.l, 0xff, v147.l +; GFX11-TRUE16-NEXT: v_and_b16 v23.h, 0xff, v130.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.l, 0xff, v144.l +; GFX11-TRUE16-NEXT: v_and_b16 v24.h, 0xff, v115.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.l, 0xff, v146.l +; GFX11-TRUE16-NEXT: v_and_b16 v25.h, 0xff, v129.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.l, 0xff, v135.l +; GFX11-TRUE16-NEXT: v_and_b16 v26.h, 0xff, v119.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.l, 0xff, v145.l +; GFX11-TRUE16-NEXT: v_and_b16 v27.h, 0xff, v113.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.l, 0xff, v134.l +; GFX11-TRUE16-NEXT: v_and_b16 v28.h, 0xff, v118.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.l, 0xff, v132.l +; GFX11-TRUE16-NEXT: v_and_b16 v29.h, 0xff, v112.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.l, 0xff, v133.l +; GFX11-TRUE16-NEXT: v_and_b16 v30.h, 0xff, v117.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.l, 0xff, v128.l +; GFX11-TRUE16-NEXT: v_and_b16 v31.h, 0xff, v114.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v48.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v49.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v1.h, v50.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v2.l, v51.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v3.l, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v52.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v50.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v53.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v54.l +; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v54.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v55.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v64.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v53.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v65.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v65.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v67.l +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v67.h +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v68.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v68.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v69.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v66.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v70.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v70.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v71.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v66.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v71.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v80.l +; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v16.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v16.h, v16.h, v81.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v17.l, v82.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.h, v17.h, v69.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v18.l, v82.h +; GFX11-TRUE16-NEXT: v_or_b16 v18.h, v18.h, v83.l +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v19.l, v83.h +; GFX11-TRUE16-NEXT: v_or_b16 v19.h, v19.h, v81.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v20.l, v84.h +; GFX11-TRUE16-NEXT: v_or_b16 v20.h, v20.h, v85.l +; GFX11-TRUE16-NEXT: v_or_b16 v21.l, v21.l, v85.h +; GFX11-TRUE16-NEXT: v_or_b16 v21.h, v21.h, v86.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.l, v22.l, v87.l +; GFX11-TRUE16-NEXT: v_or_b16 v22.h, v22.h, v84.l +; GFX11-TRUE16-NEXT: v_or_b16 v23.l, v23.l, v87.h +; GFX11-TRUE16-NEXT: v_or_b16 v23.h, v23.h, v96.l +; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v24.l, v96.h +; GFX11-TRUE16-NEXT: v_or_b16 v24.h, v24.h, v86.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.l, v25.l, v97.h +; GFX11-TRUE16-NEXT: v_or_b16 v25.h, v25.h, v98.l +; GFX11-TRUE16-NEXT: v_or_b16 v26.l, v26.l, v98.h +; GFX11-TRUE16-NEXT: v_or_b16 v26.h, v26.h, v99.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v27.l, v100.l +; GFX11-TRUE16-NEXT: v_or_b16 v27.h, v27.h, v97.l +; GFX11-TRUE16-NEXT: v_or_b16 v28.l, v28.l, v100.h +; GFX11-TRUE16-NEXT: v_or_b16 v28.h, v28.h, v101.l +; GFX11-TRUE16-NEXT: v_or_b16 v29.l, v29.l, v101.h +; GFX11-TRUE16-NEXT: v_or_b16 v29.h, v29.h, v99.h +; GFX11-TRUE16-NEXT: v_or_b16 v30.l, v30.l, v102.l +; GFX11-TRUE16-NEXT: v_or_b16 v30.h, v30.h, v102.h +; GFX11-TRUE16-NEXT: v_or_b16 v31.l, v31.l, v103.l +; GFX11-TRUE16-NEXT: v_or_b16 v31.h, v31.h, v103.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr179 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr163 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr183 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr167 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr161 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr177 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr180 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr164 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr41 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr47 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr165 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr181 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr162 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr46 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr176 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr178 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr44 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr160 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr182 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr40 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr84_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr80_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr85_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr82_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr83_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr96_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr86_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr98_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr87_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr97_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr99_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr102_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr101_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr112_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr113_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr103_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr116_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr118_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr114_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr119_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr128_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr117_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr131_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr129_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr132_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr144_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr134_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr146_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr145_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr148_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr149_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr147_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr150_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr151_hi16 +; GFX11-TRUE16-NEXT: .LBB96_2: ; %Flow ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB96_2 -; GFX11-TRUE16-NEXT: .LBB96_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v50.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v50.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.l, 3 +; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB96_4 +; GFX11-TRUE16-NEXT: ; %bb.3: ; %cmp.true +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v128.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v114.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v133.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v117.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v145.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v151.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v151.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v150.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v150.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v148.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v103.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v102.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v102.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v100.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v31.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v30.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v132.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v112.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v134.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v118.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v149.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v147.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v148.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v149.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v129.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v99.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v100.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v29.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v28.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v146.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v113.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v135.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v119.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v146.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v98.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v145.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v146.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v147.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v145.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v34.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v97.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v98.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v99.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v97.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v148.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v27.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v26.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v144.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v115.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v147.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v130.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v135.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v144.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v134.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v135.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v144.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v87.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v96.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v86.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v87.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v96.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v24.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v23.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v116.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(26) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v100.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v179.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v131.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(24) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v100.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v98.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v183.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v163.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v132.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v133.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v134.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v132.h, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v133.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v84.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v85.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v86.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v84.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v85.l, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v22.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v21.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(22) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v97.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v85.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v167.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v150.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v96.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v86.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v177.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v161.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v20.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v87.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v131.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v129.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v130.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v131.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v180.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v83.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v81.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v82.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v83.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v19.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v71.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v149.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v84.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v80.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v164.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v151.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v96.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v130.l, v2.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v47.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v82.l, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v117.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v128.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v129.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v119.h, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v86.h, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v69.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v80.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v81.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v71.h, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v41.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v16.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v85.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v80.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v43.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v165.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v84.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v82.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v45.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v181.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v128.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v119.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v114.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v118.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v118.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v80.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v71.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v66.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v70.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v70.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v83.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v46.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v162.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v71.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v69.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v42.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v176.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v70.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v44.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v117.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v114.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v116.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v116.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v115.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v69.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v66.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v68.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v68.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v67.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v68.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v178.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v68.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v182.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v160.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v67.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v66.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v40.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v166.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v115.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v113.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v103.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v112.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v113.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v67.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v65.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v64.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v65.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v39.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v65.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v39.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v38.h, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v112.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v101.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v102.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v103.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v101.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v64.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v53.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v55.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v37.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.l, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v55.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v53.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v35.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v102.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v99.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v87.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v98.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v99.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v51.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v52.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v54.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v51.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v51.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v97.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v70.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v82.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v83.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v81.l, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v81.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v51.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v49.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v32.l, v50.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v48.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v33.l, v49.l, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, 0x300, v32.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x300, v32.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, 0x300, v33.l +; GFX11-TRUE16-NEXT: .LBB96_4: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: s_clause 0x1f +; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:392 +; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:396 +; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:400 +; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:404 +; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:408 +; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:412 +; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:416 +; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:420 +; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:424 +; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:428 +; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:432 +; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:436 +; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:440 +; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:444 +; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:448 +; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:452 +; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:456 +; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:460 +; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:464 +; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:468 +; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:472 +; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:476 +; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:480 +; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:484 +; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:488 +; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:492 +; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:496 +; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:500 +; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:504 +; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:508 +; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:512 +; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:516 +; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:520 ; 4-byte Folded Reload +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64i16: @@ -201794,1657 +196834,831 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: bitcast_v128i8_to_v64i16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v2, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v16, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v18, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v20, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v22, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v24, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v26, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v28, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v30, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v41, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v44, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v45, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v56, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v59, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v60, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v61, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v62, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v63, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v72, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v73, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v74, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v75, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v77, off, s32 offset:240 -; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v78, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v79, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v88, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v89, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v90, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v91, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v92, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v93, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v94, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v57, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v58, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v40, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v182, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v183, off, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v179, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v181, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v164, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v176, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v160, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v163, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v146, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v150, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v135, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v145, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v119, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v131, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v115, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v178, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v162, off, s32 offset:132 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v165, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v149, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v151, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v144, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v148, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v129, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v133, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v117, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v118, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v114, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v116, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v100, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v103, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v98, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v99, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v96, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB97_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v119 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB97_3 -; GFX11-TRUE16-NEXT: .LBB97_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v179 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v179, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v119 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v119, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v178 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v118 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v23, v119, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v28, v179, 16, v33 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-TRUE16-NEXT: .LBB97_3: ; %end -; GFX11-TRUE16-NEXT: s_clause 0x1e -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB97_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: s_branch .LBB97_2 -; -; GFX11-FAKE16-LABEL: bitcast_v128i8_to_v64i16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:316 -; GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v16, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v18, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u16 v20, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_u16 v22, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_u16 v24, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_u16 v26, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_u16 v28, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_u16 v30, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_u16 v31, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_u16 v41, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_u16 v44, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_u16 v45, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_u16 v56, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_u16 v59, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_u16 v60, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_u16 v61, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_u16 v62, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_u16 v63, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_u16 v72, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_u16 v73, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_u16 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_u16 v75, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_u16 v76, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_u16 v77, off, s32 offset:240 -; GFX11-FAKE16-NEXT: s_clause 0x1f -; GFX11-FAKE16-NEXT: scratch_load_u16 v78, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_u16 v79, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_u16 v88, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_u16 v89, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_u16 v90, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_u16 v91, off, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_load_u16 v92, off, s32 offset:296 -; GFX11-FAKE16-NEXT: scratch_load_u16 v93, off, s32 offset:304 -; GFX11-FAKE16-NEXT: scratch_load_u16 v94, off, s32 offset:312 -; GFX11-FAKE16-NEXT: scratch_load_u16 v57, off, s32 offset:308 -; GFX11-FAKE16-NEXT: scratch_load_u16 v58, off, s32 offset:300 -; GFX11-FAKE16-NEXT: scratch_load_u16 v46, off, s32 offset:292 -; GFX11-FAKE16-NEXT: scratch_load_u16 v47, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_u16 v40, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_u16 v43, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_u16 v182, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_u16 v183, off, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_load_u16 v178, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_u16 v181, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_u16 v164, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_u16 v176, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_u16 v160, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_u16 v163, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_u16 v146, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_u16 v150, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_u16 v135, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_u16 v145, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_u16 v118, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_u16 v131, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_u16 v115, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_u16 v179, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_u16 v162, off, s32 offset:132 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v165, off, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_load_u16 v149, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_u16 v151, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_u16 v144, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_u16 v148, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_u16 v129, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_u16 v133, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_u16 v117, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_u16 v119, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v114, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u16 v116, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v100, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v103, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v98, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v99, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v96, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(62) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 8, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 8, v4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 8, v6 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v102, 8, v8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 8, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v112, 8, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v130, 8, v14 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v128, 8, v16 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 8, v18 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 8, v20 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v161, 8, v22 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v147, 8, v24 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v166, 8, v26 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v167, 8, v28 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v180, 8, v30 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v177, 8, v31 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v42, 8, v41 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(61) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v41, 8, v44 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(60) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v45, 8, v45 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(59) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v44, 8, v56 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(58) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v59, 8, v59 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(57) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v56, 8, v60 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(56) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v60, 8, v61 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(55) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v61, 8, v62 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(54) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v63, 8, v63 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(53) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v62, 8, v72 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(52) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v73, 8, v73 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(51) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v72, 8, v74 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(50) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v75, 8, v75 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(49) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v74, 8, v76 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(48) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v77, 8, v77 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(47) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v76, 8, v78 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(46) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v78, 8, v79 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(45) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v79, 8, v88 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(44) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v89, 8, v89 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(43) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v88, 8, v90 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(42) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v91, 8, v91 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(41) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v90, 8, v92 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(40) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v92, 8, v93 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(39) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v93, 8, v94 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB97_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v5, 0xffff, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v4, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v6, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v49 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v7, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v81 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v53 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v9, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v10, 16, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v85 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v10, v97 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v11, v87 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v99 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v103 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v114 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v98 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v100 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v113 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v101 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v116 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, v14, v128 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v112 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v117 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v102 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v130 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, v14, v132 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v148 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xff, v129 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v16, v161 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v166 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v144 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v134 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v18, v18, v147 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v167 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v17, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v18, 16, v22 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v151 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v149 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v20, 16, v21 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v177 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v165 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v162 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v42 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v41 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v115 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v45 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v44 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v118 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v59 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v56 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v145 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v135 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v60 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v61 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v150 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v146 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v63 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v62 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v160 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v73 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v72 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v164 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v75 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v74 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v178 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v77 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v76 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v183 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v182 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v78 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v79 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v43 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v89 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v88 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v91 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v90 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v58 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v92 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v93 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB97_3 -; GFX11-FAKE16-NEXT: .LBB97_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(37) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v58 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v57 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(35) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v47 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v92, v0 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v46 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v93, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v91, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v43 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v40 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v90, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v183 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v89, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v181 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v88, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v78, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v79, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v77, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v176 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v164 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v163 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v76, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v160 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v75, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v74, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v73, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v150 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v72, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v146 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v145 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v135 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v63, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v131 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v62, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v60, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v61, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v118 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v59, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v179 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v115 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v165 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v56, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v162 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v45, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v44, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v42, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v151 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v41, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v149 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v148 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v144 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v180, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v133 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v177, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v166, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v167, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v129 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v161, v3 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v119 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v117 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v116 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v147, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v114 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v99 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v132, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v130, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v103 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v98 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v54 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 3, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v113, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v128, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v100 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v101, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v102, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v96 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v134, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v97, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v87, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v86, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v85, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v84, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v49 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v83, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v82, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v81, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v71, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v80, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 3, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v69, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v112, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v68, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v67, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v65, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v32, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v34, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v39, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v50, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v15, 16, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v32 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v116 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v129 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v26, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v114, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v144, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v20, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v21, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v115 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v135 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v131 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v145, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v118, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v25, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v163 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v181 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v2, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v133, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v160, 16, v32 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v178, 16, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v29, 16, v34 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v30, 16, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v31, 16, v36 -; GFX11-FAKE16-NEXT: .LBB97_3: ; %end -; GFX11-FAKE16-NEXT: s_clause 0x1e -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:320 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:324 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:328 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:332 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:336 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:340 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:344 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:348 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:352 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:356 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:360 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:364 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:368 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:372 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:376 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:380 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:384 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:388 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:392 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:396 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:400 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:404 -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:408 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:412 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:416 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:420 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:424 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:428 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:432 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:436 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:440 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB97_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: s_branch .LBB97_2 +; GFX11-LABEL: bitcast_v128i8_to_v64i16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:320 +; GFX11-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 +; GFX11-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 +; GFX11-NEXT: v_dual_mov_b32 v53, v22 :: v_dual_mov_b32 v48, v20 +; GFX11-NEXT: v_dual_mov_b32 v50, v18 :: v_dual_mov_b32 v39, v16 +; GFX11-NEXT: v_dual_mov_b32 v49, v14 :: v_dual_mov_b32 v34, v8 +; GFX11-NEXT: v_dual_mov_b32 v37, v12 :: v_dual_mov_b32 v38, v10 +; GFX11-NEXT: v_dual_mov_b32 v36, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v35, v2 :: v_dual_mov_b32 v32, v0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:316 +; GFX11-NEXT: scratch_load_u16 v2, off, s32 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v16, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v18, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u16 v20, off, s32 offset:72 +; GFX11-NEXT: scratch_load_u16 v22, off, s32 offset:80 +; GFX11-NEXT: scratch_load_u16 v24, off, s32 offset:88 +; GFX11-NEXT: scratch_load_u16 v26, off, s32 offset:96 +; GFX11-NEXT: scratch_load_u16 v28, off, s32 offset:104 +; GFX11-NEXT: scratch_load_u16 v30, off, s32 offset:112 +; GFX11-NEXT: scratch_load_u16 v31, off, s32 offset:120 +; GFX11-NEXT: scratch_load_u16 v41, off, s32 offset:128 +; GFX11-NEXT: scratch_load_u16 v44, off, s32 offset:136 +; GFX11-NEXT: scratch_load_u16 v45, off, s32 offset:144 +; GFX11-NEXT: scratch_load_u16 v56, off, s32 offset:152 +; GFX11-NEXT: scratch_load_u16 v59, off, s32 offset:160 +; GFX11-NEXT: scratch_load_u16 v60, off, s32 offset:168 +; GFX11-NEXT: scratch_load_u16 v61, off, s32 offset:176 +; GFX11-NEXT: scratch_load_u16 v62, off, s32 offset:184 +; GFX11-NEXT: scratch_load_u16 v63, off, s32 offset:192 +; GFX11-NEXT: scratch_load_u16 v72, off, s32 offset:200 +; GFX11-NEXT: scratch_load_u16 v73, off, s32 offset:208 +; GFX11-NEXT: scratch_load_u16 v74, off, s32 offset:216 +; GFX11-NEXT: scratch_load_u16 v75, off, s32 offset:224 +; GFX11-NEXT: scratch_load_u16 v76, off, s32 offset:232 +; GFX11-NEXT: scratch_load_u16 v77, off, s32 offset:240 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_load_u16 v78, off, s32 offset:248 +; GFX11-NEXT: scratch_load_u16 v79, off, s32 offset:256 +; GFX11-NEXT: scratch_load_u16 v88, off, s32 offset:264 +; GFX11-NEXT: scratch_load_u16 v89, off, s32 offset:272 +; GFX11-NEXT: scratch_load_u16 v90, off, s32 offset:280 +; GFX11-NEXT: scratch_load_u16 v91, off, s32 offset:288 +; GFX11-NEXT: scratch_load_u16 v92, off, s32 offset:296 +; GFX11-NEXT: scratch_load_u16 v93, off, s32 offset:304 +; GFX11-NEXT: scratch_load_u16 v94, off, s32 offset:312 +; GFX11-NEXT: scratch_load_u16 v57, off, s32 offset:308 +; GFX11-NEXT: scratch_load_u16 v58, off, s32 offset:300 +; GFX11-NEXT: scratch_load_u16 v46, off, s32 offset:292 +; GFX11-NEXT: scratch_load_u16 v47, off, s32 offset:284 +; GFX11-NEXT: scratch_load_u16 v40, off, s32 offset:276 +; GFX11-NEXT: scratch_load_u16 v43, off, s32 offset:268 +; GFX11-NEXT: scratch_load_u16 v182, off, s32 offset:260 +; GFX11-NEXT: scratch_load_u16 v183, off, s32 offset:252 +; GFX11-NEXT: scratch_load_u16 v178, off, s32 offset:244 +; GFX11-NEXT: scratch_load_u16 v181, off, s32 offset:236 +; GFX11-NEXT: scratch_load_u16 v164, off, s32 offset:228 +; GFX11-NEXT: scratch_load_u16 v176, off, s32 offset:220 +; GFX11-NEXT: scratch_load_u16 v160, off, s32 offset:212 +; GFX11-NEXT: scratch_load_u16 v163, off, s32 offset:204 +; GFX11-NEXT: scratch_load_u16 v146, off, s32 offset:196 +; GFX11-NEXT: scratch_load_u16 v150, off, s32 offset:188 +; GFX11-NEXT: scratch_load_u16 v135, off, s32 offset:180 +; GFX11-NEXT: scratch_load_u16 v145, off, s32 offset:172 +; GFX11-NEXT: scratch_load_u16 v118, off, s32 offset:164 +; GFX11-NEXT: scratch_load_u16 v131, off, s32 offset:156 +; GFX11-NEXT: scratch_load_u16 v115, off, s32 offset:148 +; GFX11-NEXT: scratch_load_u16 v179, off, s32 offset:140 +; GFX11-NEXT: scratch_load_u16 v162, off, s32 offset:132 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v165, off, s32 offset:124 +; GFX11-NEXT: scratch_load_u16 v149, off, s32 offset:116 +; GFX11-NEXT: scratch_load_u16 v151, off, s32 offset:108 +; GFX11-NEXT: scratch_load_u16 v144, off, s32 offset:100 +; GFX11-NEXT: scratch_load_u16 v148, off, s32 offset:92 +; GFX11-NEXT: scratch_load_u16 v129, off, s32 offset:84 +; GFX11-NEXT: scratch_load_u16 v133, off, s32 offset:76 +; GFX11-NEXT: scratch_load_u16 v117, off, s32 offset:68 +; GFX11-NEXT: scratch_load_u16 v119, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v114, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u16 v116, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v100, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v103, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v98, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v99, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v96, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(62) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v97, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 8, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v101, 8, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v102, 8, v8 +; GFX11-NEXT: v_lshlrev_b32_e32 v113, 8, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v112, 8, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v130, 8, v14 +; GFX11-NEXT: v_lshlrev_b32_e32 v128, 8, v16 +; GFX11-NEXT: v_lshlrev_b32_e32 v134, 8, v18 +; GFX11-NEXT: v_lshlrev_b32_e32 v132, 8, v20 +; GFX11-NEXT: v_lshlrev_b32_e32 v161, 8, v22 +; GFX11-NEXT: v_lshlrev_b32_e32 v147, 8, v24 +; GFX11-NEXT: v_lshlrev_b32_e32 v166, 8, v26 +; GFX11-NEXT: v_lshlrev_b32_e32 v167, 8, v28 +; GFX11-NEXT: v_lshlrev_b32_e32 v180, 8, v30 +; GFX11-NEXT: v_lshlrev_b32_e32 v177, 8, v31 +; GFX11-NEXT: v_lshlrev_b32_e32 v42, 8, v41 +; GFX11-NEXT: s_waitcnt vmcnt(61) +; GFX11-NEXT: v_lshlrev_b32_e32 v41, 8, v44 +; GFX11-NEXT: s_waitcnt vmcnt(60) +; GFX11-NEXT: v_lshlrev_b32_e32 v45, 8, v45 +; GFX11-NEXT: s_waitcnt vmcnt(59) +; GFX11-NEXT: v_lshlrev_b32_e32 v44, 8, v56 +; GFX11-NEXT: s_waitcnt vmcnt(58) +; GFX11-NEXT: v_lshlrev_b32_e32 v59, 8, v59 +; GFX11-NEXT: s_waitcnt vmcnt(57) +; GFX11-NEXT: v_lshlrev_b32_e32 v56, 8, v60 +; GFX11-NEXT: s_waitcnt vmcnt(56) +; GFX11-NEXT: v_lshlrev_b32_e32 v60, 8, v61 +; GFX11-NEXT: s_waitcnt vmcnt(55) +; GFX11-NEXT: v_lshlrev_b32_e32 v61, 8, v62 +; GFX11-NEXT: s_waitcnt vmcnt(54) +; GFX11-NEXT: v_lshlrev_b32_e32 v63, 8, v63 +; GFX11-NEXT: s_waitcnt vmcnt(53) +; GFX11-NEXT: v_lshlrev_b32_e32 v62, 8, v72 +; GFX11-NEXT: s_waitcnt vmcnt(52) +; GFX11-NEXT: v_lshlrev_b32_e32 v73, 8, v73 +; GFX11-NEXT: s_waitcnt vmcnt(51) +; GFX11-NEXT: v_lshlrev_b32_e32 v72, 8, v74 +; GFX11-NEXT: s_waitcnt vmcnt(50) +; GFX11-NEXT: v_lshlrev_b32_e32 v75, 8, v75 +; GFX11-NEXT: s_waitcnt vmcnt(49) +; GFX11-NEXT: v_lshlrev_b32_e32 v74, 8, v76 +; GFX11-NEXT: s_waitcnt vmcnt(48) +; GFX11-NEXT: v_lshlrev_b32_e32 v77, 8, v77 +; GFX11-NEXT: s_waitcnt vmcnt(47) +; GFX11-NEXT: v_lshlrev_b32_e32 v76, 8, v78 +; GFX11-NEXT: s_waitcnt vmcnt(46) +; GFX11-NEXT: v_lshlrev_b32_e32 v78, 8, v79 +; GFX11-NEXT: s_waitcnt vmcnt(45) +; GFX11-NEXT: v_lshlrev_b32_e32 v79, 8, v88 +; GFX11-NEXT: s_waitcnt vmcnt(44) +; GFX11-NEXT: v_lshlrev_b32_e32 v89, 8, v89 +; GFX11-NEXT: s_waitcnt vmcnt(43) +; GFX11-NEXT: v_lshlrev_b32_e32 v88, 8, v90 +; GFX11-NEXT: s_waitcnt vmcnt(42) +; GFX11-NEXT: v_lshlrev_b32_e32 v91, 8, v91 +; GFX11-NEXT: s_waitcnt vmcnt(41) +; GFX11-NEXT: v_lshlrev_b32_e32 v90, 8, v92 +; GFX11-NEXT: s_waitcnt vmcnt(40) +; GFX11-NEXT: v_lshlrev_b32_e32 v92, 8, v93 +; GFX11-NEXT: s_waitcnt vmcnt(39) +; GFX11-NEXT: v_lshlrev_b32_e32 v93, 8, v94 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB97_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s29, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: v_and_b32_e64 v5, 0xffff, s5 +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_and_b32 s11, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v32 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v68 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v64 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v66 +; GFX11-NEXT: v_or_b32_e32 v6, v4, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v65 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v38 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v39 +; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v49 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v37 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v50 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v71 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v48 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v69 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v82 +; GFX11-NEXT: v_or_b32_e32 v9, v7, v80 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v81 +; GFX11-NEXT: v_lshl_or_b32 v7, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v53 +; GFX11-NEXT: v_lshl_or_b32 v8, v9, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v51 +; GFX11-NEXT: v_lshl_or_b32 v9, v10, 16, v3 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v84 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v86 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v83 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v96 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v85 +; GFX11-NEXT: v_or_b32_e32 v12, v10, v97 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v11, v87 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v99 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v103 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v114 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v98 +; GFX11-NEXT: v_lshl_or_b32 v12, v0, 16, v12 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v100 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v113 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v101 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v116 +; GFX11-NEXT: v_or_b32_e32 v17, v14, v128 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v112 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v117 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v102 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v130 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v133 +; GFX11-NEXT: v_or_b32_e32 v20, v14, v132 +; GFX11-NEXT: v_lshl_or_b32 v14, v0, 16, v3 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v148 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v119 +; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v129 +; GFX11-NEXT: v_or_b32_e32 v16, v16, v161 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; GFX11-NEXT: v_lshl_or_b32 v13, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v166 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v144 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v134 +; GFX11-NEXT: v_or_b32_e32 v18, v18, v147 +; GFX11-NEXT: v_and_b32_e32 v22, 0xffff, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v167 +; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v15 +; GFX11-NEXT: v_lshl_or_b32 v15, v17, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v17, v18, 16, v22 +; GFX11-NEXT: v_mov_b32_e32 v2, s7 +; GFX11-NEXT: v_lshl_or_b32 v18, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v151 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v149 +; GFX11-NEXT: v_lshl_or_b32 v16, v20, 16, v21 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v180 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v177 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v19, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v165 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v162 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v42 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v41 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v20, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v179 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v115 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v45 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v44 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v21, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v131 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v118 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v59 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v56 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v22, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v145 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v135 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v60 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v61 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v23, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v150 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v146 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v63 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v62 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v24, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v163 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v160 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v73 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v72 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v25, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v176 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v164 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v75 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v74 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v26, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v181 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v178 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v77 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v76 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v27, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v183 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v182 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v78 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v79 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v28, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v43 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v40 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v89 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v88 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v29, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v47 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v46 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v91 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v90 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v30, v1, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v58 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v57 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or_b32_e32 v0, v0, v92 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v93 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v31, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB97_3 +; GFX11-NEXT: .LBB97_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: s_waitcnt vmcnt(37) +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v58 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v57 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: s_waitcnt vmcnt(35) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v47 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v92, v0 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v46 +; GFX11-NEXT: v_or_b32_e32 v1, v93, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v91, v2 +; GFX11-NEXT: s_waitcnt vmcnt(33) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v43 +; GFX11-NEXT: v_add_nc_u32_e32 v27, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v40 +; GFX11-NEXT: v_add_nc_u32_e32 v31, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v28, 0x300, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v4 +; GFX11-NEXT: v_or_b32_e32 v3, v90, v3 +; GFX11-NEXT: s_waitcnt vmcnt(31) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v183 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v182 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v89, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v30, 0x300, v3 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(29) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v181 +; GFX11-NEXT: v_or_b32_e32 v0, v88, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v181, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v78, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v79, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v29, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v178 +; GFX11-NEXT: v_add_nc_u32_e32 v182, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v178, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v77, v3 +; GFX11-NEXT: s_waitcnt vmcnt(27) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v176 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v164 +; GFX11-NEXT: s_waitcnt vmcnt(25) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v163 +; GFX11-NEXT: v_add_nc_u32_e32 v163, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v76, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v160 +; GFX11-NEXT: v_or_b32_e32 v1, v75, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v160, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v74, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v73, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(23) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v150 +; GFX11-NEXT: v_add_nc_u32_e32 v26, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v72, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v146 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(21) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v145 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v135 +; GFX11-NEXT: v_add_nc_u32_e32 v25, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v63, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(19) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v131 +; GFX11-NEXT: v_or_b32_e32 v0, v62, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v131, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v60, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v61, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v24, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v118 +; GFX11-NEXT: v_add_nc_u32_e32 v135, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v118, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v59, v3 +; GFX11-NEXT: s_waitcnt vmcnt(17) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v179 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v115 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v165 +; GFX11-NEXT: v_add_nc_u32_e32 v115, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v56, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v162 +; GFX11-NEXT: v_or_b32_e32 v1, v45, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v145, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v44, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v42, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v1 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v151 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v41, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v149 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v148 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v144 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v180, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v133 +; GFX11-NEXT: v_or_b32_e32 v0, v177, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v166, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v167, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v133, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v129 +; GFX11-NEXT: v_add_nc_u32_e32 v129, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v144, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v161, v3 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v119 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v117 +; GFX11-NEXT: s_waitcnt vmcnt(5) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v116 +; GFX11-NEXT: v_add_nc_u32_e32 v116, 0x300, v1 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v0, v147, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v114 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v99 +; GFX11-NEXT: v_add_nc_u32_e32 v114, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v0, v132, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v130, v3 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v103 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v98 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v54 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v39 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 3, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v4, v113, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v2, v128, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v100 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v101, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v102, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v96 +; GFX11-NEXT: v_or_b32_e32 v1, v134, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v97, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v55 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v52 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v87, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v51 +; GFX11-NEXT: v_or_b32_e32 v4, v86, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v85, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v84, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v51, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v50, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v49 +; GFX11-NEXT: v_or_b32_e32 v5, v83, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v48 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v82, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v5, v81, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v38, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v71, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v80, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v37, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v39, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v36 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 3, v35 +; GFX11-NEXT: v_add_nc_u32_e32 v35, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v5, v69, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v34 +; GFX11-NEXT: v_or_b32_e32 v3, v112, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v68, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v34, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v67, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v33, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v32 +; GFX11-NEXT: v_add_nc_u32_e32 v32, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v65, v8 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_or_b32_e32 v4, v64, v4 +; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v33 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v37 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_lshl_or_b32 v6, v32, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v51 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v38 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-NEXT: v_lshl_or_b32 v7, v34, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v39, 16, v33 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v11 +; GFX11-NEXT: v_lshl_or_b32 v11, v50, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v1 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v12, v15, 16, v14 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v16, v16, 16, v32 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v116 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v129 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v18 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v17 +; GFX11-NEXT: v_lshl_or_b32 v26, v26, 16, v36 +; GFX11-NEXT: v_lshl_or_b32 v17, v114, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v18, v144, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v20, v20, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v21, v21, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v115 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v135 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v131 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v23 +; GFX11-NEXT: v_and_b32_e32 v36, 0xffff, v27 +; GFX11-NEXT: v_lshl_or_b32 v22, v145, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v23, v118, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v24, v24, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v25, v25, 16, v35 +; GFX11-NEXT: v_and_b32_e32 v32, 0xffff, v163 +; GFX11-NEXT: v_and_b32_e32 v33, 0xffff, v182 +; GFX11-NEXT: v_and_b32_e32 v34, 0xffff, v181 +; GFX11-NEXT: v_and_b32_e32 v35, 0xffff, v28 +; GFX11-NEXT: v_lshl_or_b32 v15, v2, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_lshl_or_b32 v19, v133, 16, v19 +; GFX11-NEXT: v_lshl_or_b32 v27, v160, 16, v32 +; GFX11-NEXT: v_lshl_or_b32 v28, v178, 16, v33 +; GFX11-NEXT: v_lshl_or_b32 v29, v29, 16, v34 +; GFX11-NEXT: v_lshl_or_b32 v30, v30, 16, v35 +; GFX11-NEXT: v_lshl_or_b32 v31, v31, 16, v36 +; GFX11-NEXT: .LBB97_3: ; %end +; GFX11-NEXT: s_clause 0x1e +; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:320 +; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:324 +; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:328 +; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:332 +; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:336 +; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:340 +; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:344 +; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:348 +; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:352 +; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:356 +; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:360 +; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:364 +; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:368 +; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:372 +; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:376 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:380 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:384 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:388 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:392 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:396 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:400 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:404 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:408 +; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:412 +; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:416 +; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:420 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:424 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:428 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:432 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:436 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:440 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB97_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 +; GFX11-NEXT: s_branch .LBB97_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll index 52e125d0d658f..70f431d2c7cdf 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll @@ -6292,8 +6292,8 @@ define <8 x i32> @bitcast_v32i8_to_v8i32(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v17.l @@ -6322,8 +6322,8 @@ define <8 x i32> @bitcast_v32i8_to_v8i32(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB26_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -13313,8 +13313,8 @@ define <8 x float> @bitcast_v32i8_to_v8f32(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v17.l @@ -13343,8 +13343,8 @@ define <8 x float> @bitcast_v32i8_to_v8f32(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB50_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -19852,8 +19852,8 @@ define <4 x i64> @bitcast_v32i8_to_v4i64(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v17.l @@ -19882,8 +19882,8 @@ define <4 x i64> @bitcast_v32i8_to_v4i64(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB70_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -25881,8 +25881,8 @@ define <4 x double> @bitcast_v32i8_to_v4f64(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v17.l @@ -25911,8 +25911,8 @@ define <4 x double> @bitcast_v32i8_to_v4f64(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB86_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -31633,8 +31633,8 @@ define <16 x i16> @bitcast_v32i8_to_v16i16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v17.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v13.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v11.l @@ -31663,8 +31663,8 @@ define <16 x i16> @bitcast_v32i8_to_v16i16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB98_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -36535,8 +36535,8 @@ define <16 x half> @bitcast_v32i8_to_v16f16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v17.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v13.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v11.l @@ -36565,8 +36565,8 @@ define <16 x half> @bitcast_v32i8_to_v16f16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB106_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -40373,8 +40373,8 @@ define <16 x bfloat> @bitcast_v32i8_to_v16bf16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v17.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v13.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v11.l @@ -40403,8 +40403,8 @@ define <16 x bfloat> @bitcast_v32i8_to_v16bf16(<32 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v31.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v32.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v31 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB110_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll index 436b1a038b274..bb4fd7b6f1e88 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll @@ -5019,21 +5019,22 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v25.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v17.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v17.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v14.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v12.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v10.l @@ -5050,7 +5051,7 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v11.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v13.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.l, 8, v35.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.l, 8, v31.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.h, 8, v30.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.l, 8, v29.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.h, 8, v28.h @@ -5059,17 +5060,12 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.l, 8, v29.l ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v33.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.l, 8, v33.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.h, 8, v34.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.l, 8, v34.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v35.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v36 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.h, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v48.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v35 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB14_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -5119,14 +5115,14 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v34.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v33.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v27, v4 ; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v5.l, v14.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v27.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v12.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v11.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v31.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v27, v5 ; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v6.l, v13.l @@ -5147,10 +5143,10 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr24_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_lo16 @@ -5265,14 +5261,14 @@ define <10 x i32> @bitcast_v40i8_to_v10i32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v12.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v12.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v25, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v31.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v31.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v11.l, v7.l @@ -11979,21 +11975,22 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v31, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v32, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v25.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v17.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v17.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v14.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v12.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v10.l @@ -12010,7 +12007,7 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v11.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v13.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.l, 8, v35.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.l, 8, v31.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v15.h, 8, v30.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.l, 8, v29.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.h, 8, v28.h @@ -12019,17 +12016,12 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.l, 8, v29.l ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v33.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.l, 8, v33.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.h, 8, v34.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.l, 8, v34.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v35.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v36 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.l, 8, v37.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v11.h, 8, v38.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v48.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v35 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB34_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -12079,14 +12071,14 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v34.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v33.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v27, v4 ; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v5.l, v14.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v27.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v12.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v11.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v31.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v27, v5 ; GFX11-TRUE16-NEXT: v_or_b16 v27.l, v6.l, v13.l @@ -12107,10 +12099,10 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr24_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_lo16 @@ -12225,14 +12217,14 @@ define <10 x float> @bitcast_v40i8_to_v10f32(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v12.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v12.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v34.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v25, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v25.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v31.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v31.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v11.l, v7.l @@ -18549,17 +18541,17 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v25.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v23.l @@ -18592,21 +18584,22 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v29.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v29.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v30.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v28.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v35.h -; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v28.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v33.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v31.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v33.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v38.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v39.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v48.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v36.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v37 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB50_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -18634,10 +18627,10 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v18.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v18.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v19.h @@ -18651,13 +18644,13 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v29.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v29.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v30.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v33.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v35.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v33.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v34.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v34.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v35.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v32.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v33.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_hi16 @@ -18674,10 +18667,10 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_hi16 @@ -18691,33 +18684,33 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB50_2 ; GFX11-TRUE16-NEXT: .LBB50_4: ; %cmp.true ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v31.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v35.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v34.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v35.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v34.l, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v35.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v33.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v32.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v33.h, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, 0x300, v1.l @@ -18734,7 +18727,7 @@ define <20 x i16> @bitcast_v40i8_to_v20i16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v23.l, 3 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v28.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v30.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.l, v1.l ; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v29.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v0.l @@ -24630,17 +24623,17 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v33, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v25.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v23.l @@ -24673,21 +24666,22 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v29.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v29.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v30.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v28.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v35.h -; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v28.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v33.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v31.h +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v33.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v38.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v34.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v39.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v48.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v36.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v37 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v49.l +; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB62_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -24715,10 +24709,10 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v18.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v18.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v19.h @@ -24732,13 +24726,13 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v29.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v29.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v30.h -; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v33.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v35.h +; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v33.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v33.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v34.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v34.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v35.l +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v32.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v33.l ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_hi16 @@ -24755,10 +24749,10 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_hi16 @@ -24772,33 +24766,33 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB62_2 ; GFX11-TRUE16-NEXT: .LBB62_4: ; %cmp.true ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v31.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v36.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.l, 3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v35.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v34.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v35.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v34.l, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v35.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v33.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v32.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v33.h, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, 0x300, v1.l @@ -24815,7 +24809,7 @@ define <20 x half> @bitcast_v40i8_to_v20f16(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v23.l, 3 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v28.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v30.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.l, v1.l ; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v29.l, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v0.l @@ -28750,20 +28744,24 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v29.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v23.l +; GFX11-TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v27.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v25.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v16.l @@ -28773,12 +28771,12 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v8.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v6.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v7.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v9.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v11.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v13.l @@ -28786,23 +28784,18 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v17.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v21.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v48.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v39.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v39.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v38.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v37.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v36.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v35.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v34.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v36.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v36.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v37.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v37.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v38.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v49 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v48.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v49.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v50.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v51.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v38 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB72_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -28810,23 +28803,22 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB72_4 ; GFX11-TRUE16-NEXT: .LBB72_2: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB72_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v33.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v29.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v0.l, v34.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v34.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v0.l, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v10.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v33.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v31.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v27.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v10, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v1.h, v33.l +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v1.h, v31.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v21.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v29.l @@ -28851,21 +28843,20 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v36.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v10, v4 ; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v5.l, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v18.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v10, v5 ; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v6.l, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v10.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_hi16 @@ -28880,14 +28871,14 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_hi16 @@ -28921,8 +28912,8 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB72_2 ; GFX11-TRUE16-NEXT: .LBB72_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v30.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v29.h, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, 0 @@ -28931,10 +28922,10 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v34.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v34.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v33.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v32.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v31.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v27.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h @@ -28997,16 +28988,15 @@ define <5 x double> @bitcast_v40i8_to_v5f64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v18.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v18.h, v6.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v36.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v10, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v34.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v17.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v17.h, v7.h @@ -32861,20 +32851,24 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x9 -; GFX11-TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v29.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v23.l +; GFX11-TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v29.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v27.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v25.l +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v16.l @@ -32884,12 +32878,12 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v8.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v6.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v7.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v7.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v9.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v11.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v13.l @@ -32897,23 +32891,18 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v17.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v21.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v48.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v39.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v39.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v38.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v37.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v36.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v35.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v34.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v36.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v36.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v37.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v37.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v38.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v49 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v48.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v49.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v50.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v51.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v38 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB76_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -32921,23 +32910,22 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB76_4 ; GFX11-TRUE16-NEXT: .LBB76_2: ; %end ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB76_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v33.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v29.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v0.l, v34.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v34.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v0.l, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v10.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v33.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v31.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v27.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v10, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v1.h, v33.l +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v1.h, v31.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v21.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v29.l @@ -32962,21 +32950,20 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v19.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v37.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v36.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v10, v4 ; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v5.l, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v18.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v35.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l ; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v10, v5 ; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v6.l, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v10.h -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_hi16 @@ -32991,14 +32978,14 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_hi16 @@ -33032,8 +33019,8 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB76_2 ; GFX11-TRUE16-NEXT: .LBB76_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v30.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v29.h, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, 0 @@ -33042,10 +33029,10 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v34.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v34.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v33.l, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v32.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v31.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v31.l, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v27.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h @@ -33108,16 +33095,15 @@ define <5 x i64> @bitcast_v40i8_to_v5i64(<40 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v18.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v18.h, v6.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v32.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v36.l, 3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v10, v9 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v34.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v10.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v17.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v17.h, v7.h diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll index ede44e738fe00..ffa77f3c92a1c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll @@ -12457,107 +12457,107 @@ define <16 x i32> @bitcast_v64i8_to_v16i32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v22.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v20.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v21.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v23.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.l, 8, v25.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.h, 8, v27.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v80.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v48.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v80.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v81.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v65.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v65.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v83.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v66.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v67.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v68.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v98.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v99.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v69.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v100.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v101.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v70.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v102.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v103.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v71.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v112.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v80.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v81 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v113.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v86 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB26_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -12568,95 +12568,95 @@ define <16 x i32> @bitcast_v64i8_to_v16i32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB26_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v0.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v55.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v64.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v54.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v64, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v1.h, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v0.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v80, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v1.h, v37.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v29.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v35.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v2.l, v51.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v2.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v64, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v3.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v3.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v34.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v4.l, v39.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v4.l, v31.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v5.l, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v80, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v5.l, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v23.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v64, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v6.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v80, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v6.l, v27.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v67.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v22.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v64, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v7.l, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v80, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v7.l, v25.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v21.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v64, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v8.l, v23.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v8.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v19.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v64, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v9.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v80, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v9.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v51.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v18.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v64, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v10.l, v21.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v80, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v10.l, v21.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 @@ -12665,32 +12665,32 @@ define <16 x i32> @bitcast_v64i8_to_v16i32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_lo16 @@ -12708,199 +12708,199 @@ define <16 x i32> @bitcast_v64i8_to_v16i32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v64, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v11.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v80, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v11.l, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v16.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v12.l, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v80, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v12.l, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v64, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v13.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v80, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v13.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v64, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v14.l, v17.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v80, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v14.l, v17.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v64, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v15.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v80, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v15.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v64, v15 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v80, v15 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB26_2 ; GFX11-TRUE16-NEXT: .LBB26_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, 0 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v39.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v54.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v54.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v38.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v38.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v52, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v36, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v51.l, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v35.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v52, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v36, v4 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v36.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v50.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v50.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v34.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v34.h, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v29.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v28.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v52, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v36, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v26.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v24.h, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v39.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v48.l, v3.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v31.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v32.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v52, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v36, v6 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v29.h, v4.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v30.h, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v24.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v26.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v52, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v36, v7 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v30.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v27.l, v5.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v27.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v36, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v25.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v25.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v52, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v71.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v36, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v68.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v23.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v23.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v52, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v36, v10 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v22.l, v8.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v22.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v52, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v36, v11 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v35.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v64.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v21.l, v9.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v21.h, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v52, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v36, v12 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v20.l, v10.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v20.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v52, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v55.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v54.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v36, v13 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v52.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v19.l, v11.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v19.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v52, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v36, v14 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v18.l, v12.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v18.h, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v52, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v36, v15 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v31.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v17.l, v13.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v17.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v52, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v36, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v16.l, v14.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v16.h, v14.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v52, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v36, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v52, v15 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v36, v15 ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -14680,775 +14680,390 @@ define inreg <16 x i32> @bitcast_v64i8_to_v16i32_scalar(<64 x i8> inreg %a, i32 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB27_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v16i32_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB27_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB27_3 -; GFX11-TRUE16-NEXT: .LBB27_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB27_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB27_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB27_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v16i32_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v39, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v48, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v49, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v50, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v51, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v52, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v53, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB27_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB27_3 -; GFX11-FAKE16-NEXT: .LBB27_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB27_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB27_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB27_2 +; GFX11-LABEL: bitcast_v64i8_to_v16i32_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 +; GFX11-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 +; GFX11-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v39, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v48, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v49, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v51, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v52, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v53, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v86 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB27_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_lshl_b32 s7, s17, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_and_b32 s6, s16, 0xff +; GFX11-NEXT: s_and_b32 s8, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s19, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s6, s6, 0xffff +; GFX11-NEXT: s_lshl_b32 s7, s7, 16 +; GFX11-NEXT: s_and_b32 s8, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s21, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_and_b32 s8, s9, 0xffff +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v31 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v32 +; GFX11-NEXT: s_lshl_b32 s9, s9, 16 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v38 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v83 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v84 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v85 +; GFX11-NEXT: s_and_b32 s11, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v82 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v68 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v69 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s10, s10, 0xffff +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s10, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v71 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v80 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v81 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v65 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v66 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v67 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v64 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v51 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v49 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v27 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v54 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v55 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v17 +; GFX11-NEXT: v_or_b32_e32 v12, v12, v19 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v21 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v23 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v25 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v96, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 16, v86 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v87 +; GFX11-NEXT: v_or_b32_e32 v14, v96, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v86 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB27_3 +; GFX11-NEXT: .LBB27_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v31 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v32 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v38 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v33 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v0, v83, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v84, v1 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_or_b32_e32 v2, v85, v2 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v6, v82, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v68, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v69, v11 +; GFX11-NEXT: s_and_b32 s6, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s7, s6 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v52 +; GFX11-NEXT: v_or_b32_e32 v0, v70, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v71, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_or_b32_e32 v6, v65, v6 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v7 +; GFX11-NEXT: v_or_b32_e32 v9, v67, v9 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v64, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v51 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_or_b32_e32 v0, v27, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v29, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v54, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v55, v3 +; GFX11-NEXT: v_or_b32_e32 v11, v17, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v19, v12 +; GFX11-NEXT: v_or_b32_e32 v14, v21, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v23, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v25, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 +; GFX11-NEXT: v_or_b32_e32 v14, v18, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB27_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB27_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB27_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -27342,107 +26957,107 @@ define <16 x float> @bitcast_v64i8_to_v16f32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v22.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v20.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v21.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v23.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.l, 8, v25.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.h, 8, v27.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v80.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v48.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v80.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v81.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v65.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v65.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v83.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v66.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v67.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v68.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v98.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v99.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v69.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v100.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v101.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v70.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v102.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v103.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v71.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v112.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v80.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v81 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v113.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v86 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB50_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -27453,95 +27068,95 @@ define <16 x float> @bitcast_v64i8_to_v16f32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB50_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v0.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v55.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v64.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v54.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v64, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v1.h, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v0.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v80, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v1.h, v37.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v29.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v35.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v2.l, v51.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v2.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v64, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v3.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v3.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v34.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v4.l, v39.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v4.l, v31.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v5.l, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v80, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v5.l, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v23.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v64, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v6.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v80, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v6.l, v27.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v67.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v22.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v64, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v7.l, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v80, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v7.l, v25.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v21.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v64, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v8.l, v23.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v8.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v19.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v64, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v9.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v80, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v9.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v51.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v18.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v64, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v10.l, v21.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v80, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v10.l, v21.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 @@ -27550,32 +27165,32 @@ define <16 x float> @bitcast_v64i8_to_v16f32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_lo16 @@ -27593,199 +27208,199 @@ define <16 x float> @bitcast_v64i8_to_v16f32(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v64, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v11.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v80, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v11.l, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v16.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v12.l, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v80, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v12.l, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v64, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v13.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v80, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v13.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v64, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v14.l, v17.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v80, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v14.l, v17.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v64, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v15.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v80, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v15.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v64, v15 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v80, v15 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB50_2 ; GFX11-TRUE16-NEXT: .LBB50_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, 0 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v39.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v54.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v54.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v38.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v38.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v52, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v36, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v51.l, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v35.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v52, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v36, v4 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v36.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v50.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v50.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v34.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v34.h, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v29.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v28.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v52, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v36, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v26.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v24.h, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v39.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v48.l, v3.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v31.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v32.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v52, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v36, v6 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v29.h, v4.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v30.h, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v24.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v26.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v52, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v36, v7 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v30.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v27.l, v5.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v27.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v36, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v25.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v25.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v52, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v71.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v36, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v68.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v23.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v23.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v52, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v36, v10 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v22.l, v8.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v22.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v52, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v36, v11 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v35.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v64.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v21.l, v9.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v21.h, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v52, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v36, v12 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v20.l, v10.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v20.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v52, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v55.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v54.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v36, v13 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v52.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v19.l, v11.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v19.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v52, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v36, v14 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v18.l, v12.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v18.h, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v52, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v36, v15 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v31.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v17.l, v13.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v17.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v52, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v36, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v16.l, v14.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v16.h, v14.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v52, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v36, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v52, v15 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v36, v15 ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -29565,775 +29180,390 @@ define inreg <16 x float> @bitcast_v64i8_to_v16f32_scalar(<64 x i8> inreg %a, i3 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB51_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v16f32_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 -; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB51_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v16f32_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v39, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v48, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v49, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v50, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v51, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v52, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v53, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB51_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB51_3 -; GFX11-FAKE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB51_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB51_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB51_2 +; GFX11-LABEL: bitcast_v64i8_to_v16f32_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 +; GFX11-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 +; GFX11-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v39, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v48, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v49, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v51, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v52, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v53, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v86 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB51_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_lshl_b32 s7, s17, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_and_b32 s6, s16, 0xff +; GFX11-NEXT: s_and_b32 s8, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s19, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s6, s6, 0xffff +; GFX11-NEXT: s_lshl_b32 s7, s7, 16 +; GFX11-NEXT: s_and_b32 s8, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s21, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_and_b32 s8, s9, 0xffff +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v31 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v32 +; GFX11-NEXT: s_lshl_b32 s9, s9, 16 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v38 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v83 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v84 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v85 +; GFX11-NEXT: s_and_b32 s11, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v82 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v68 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v69 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s10, s10, 0xffff +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s10, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v71 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v80 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v81 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v65 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v66 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v67 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v64 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v51 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v49 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v27 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v54 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v55 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v17 +; GFX11-NEXT: v_or_b32_e32 v12, v12, v19 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v21 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v23 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v25 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v96, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 16, v86 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v87 +; GFX11-NEXT: v_or_b32_e32 v14, v96, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v86 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB51_3 +; GFX11-NEXT: .LBB51_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v31 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v32 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v38 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v33 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v0, v83, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v84, v1 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_or_b32_e32 v2, v85, v2 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v6, v82, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v68, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v69, v11 +; GFX11-NEXT: s_and_b32 s6, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s7, s6 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v52 +; GFX11-NEXT: v_or_b32_e32 v0, v70, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v71, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_or_b32_e32 v6, v65, v6 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v7 +; GFX11-NEXT: v_or_b32_e32 v9, v67, v9 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v64, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v51 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_or_b32_e32 v0, v27, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v29, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v54, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v55, v3 +; GFX11-NEXT: v_or_b32_e32 v11, v17, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v19, v12 +; GFX11-NEXT: v_or_b32_e32 v14, v21, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v23, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v25, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 +; GFX11-NEXT: v_or_b32_e32 v14, v18, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB51_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB51_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB51_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -41499,107 +40729,107 @@ define <8 x i64> @bitcast_v64i8_to_v8i64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v22.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v20.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v21.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v23.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.l, 8, v25.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.h, 8, v27.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v80.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v48.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v80.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v81.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v65.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v65.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v83.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v66.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v67.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v68.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v98.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v99.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v69.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v100.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v101.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v70.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v102.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v103.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v71.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v112.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v80.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v81 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v113.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v86 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB70_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -41610,95 +40840,95 @@ define <8 x i64> @bitcast_v64i8_to_v8i64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB70_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v0.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v55.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v64.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v54.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v64, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v1.h, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v0.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v80, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v1.h, v37.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v29.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v35.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v2.l, v51.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v2.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v64, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v3.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v3.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v34.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v4.l, v39.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v4.l, v31.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v5.l, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v80, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v5.l, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v23.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v64, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v6.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v80, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v6.l, v27.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v67.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v22.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v64, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v7.l, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v80, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v7.l, v25.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v21.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v64, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v8.l, v23.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v8.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v19.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v64, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v9.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v80, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v9.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v51.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v18.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v64, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v10.l, v21.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v80, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v10.l, v21.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 @@ -41707,32 +40937,32 @@ define <8 x i64> @bitcast_v64i8_to_v8i64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_lo16 @@ -41750,199 +40980,199 @@ define <8 x i64> @bitcast_v64i8_to_v8i64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v64, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v11.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v80, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v11.l, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v16.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v12.l, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v80, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v12.l, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v64, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v13.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v80, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v13.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v64, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v14.l, v17.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v80, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v14.l, v17.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v64, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v15.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v80, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v15.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v64, v15 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v80, v15 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB70_2 ; GFX11-TRUE16-NEXT: .LBB70_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, 0 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v39.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v54.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v54.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v38.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v38.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v52, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v36, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v51.l, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v35.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v52, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v36, v4 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v36.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v50.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v50.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v34.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v34.h, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v29.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v28.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v52, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v36, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v26.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v24.h, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v39.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v48.l, v3.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v31.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v32.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v52, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v36, v6 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v29.h, v4.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v30.h, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v24.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v26.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v52, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v36, v7 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v30.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v27.l, v5.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v27.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v36, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v25.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v25.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v52, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v71.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v36, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v68.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v23.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v23.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v52, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v36, v10 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v22.l, v8.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v22.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v52, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v36, v11 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v35.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v64.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v21.l, v9.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v21.h, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v52, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v36, v12 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v20.l, v10.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v20.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v52, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v55.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v54.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v36, v13 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v52.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v19.l, v11.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v19.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v52, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v36, v14 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v18.l, v12.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v18.h, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v52, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v36, v15 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v31.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v17.l, v13.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v17.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v52, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v36, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v16.l, v14.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v16.h, v14.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v52, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v36, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v52, v15 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v36, v15 ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -43722,775 +42952,390 @@ define inreg <8 x i64> @bitcast_v64i8_to_v8i64_scalar(<64 x i8> inreg %a, i32 in ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB71_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v8i64_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB71_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB71_3 -; GFX11-TRUE16-NEXT: .LBB71_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB71_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB71_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB71_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v8i64_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v39, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v48, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v49, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v50, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v51, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v52, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v53, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB71_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB71_3 -; GFX11-FAKE16-NEXT: .LBB71_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB71_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB71_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB71_2 +; GFX11-LABEL: bitcast_v64i8_to_v8i64_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 +; GFX11-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 +; GFX11-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v39, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v48, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v49, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v51, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v52, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v53, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v86 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB71_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_lshl_b32 s7, s17, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_and_b32 s6, s16, 0xff +; GFX11-NEXT: s_and_b32 s8, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s19, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s6, s6, 0xffff +; GFX11-NEXT: s_lshl_b32 s7, s7, 16 +; GFX11-NEXT: s_and_b32 s8, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s21, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_and_b32 s8, s9, 0xffff +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v31 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v32 +; GFX11-NEXT: s_lshl_b32 s9, s9, 16 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v38 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v83 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v84 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v85 +; GFX11-NEXT: s_and_b32 s11, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v82 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v68 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v69 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s10, s10, 0xffff +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s10, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v71 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v80 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v81 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v65 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v66 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v67 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v64 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v51 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v49 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v27 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v54 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v55 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v17 +; GFX11-NEXT: v_or_b32_e32 v12, v12, v19 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v21 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v23 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v25 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v96, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 16, v86 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v87 +; GFX11-NEXT: v_or_b32_e32 v14, v96, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v86 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB71_3 +; GFX11-NEXT: .LBB71_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v31 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v32 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v38 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v33 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v0, v83, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v84, v1 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_or_b32_e32 v2, v85, v2 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v6, v82, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v68, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v69, v11 +; GFX11-NEXT: s_and_b32 s6, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s7, s6 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v52 +; GFX11-NEXT: v_or_b32_e32 v0, v70, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v71, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_or_b32_e32 v6, v65, v6 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v7 +; GFX11-NEXT: v_or_b32_e32 v9, v67, v9 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v64, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v51 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_or_b32_e32 v0, v27, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v29, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v54, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v55, v3 +; GFX11-NEXT: v_or_b32_e32 v11, v17, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v19, v12 +; GFX11-NEXT: v_or_b32_e32 v14, v21, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v23, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v25, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 +; GFX11-NEXT: v_or_b32_e32 v14, v18, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB71_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB71_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB71_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -54802,107 +53647,107 @@ define <8 x double> @bitcast_v64i8_to_v8f64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v65, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v66, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v66, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v67, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v68, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v69, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v70, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v70, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v71, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v71, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v99, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v100, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v101, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v102, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v103, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v112, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v113, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v29.l +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:4 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(32) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v22.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v20.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v18.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v14.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v10.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v4.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v53.h, v2.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v55.h, v0.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v1.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v3.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v9.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v13.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v17.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v14.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, v12.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v8.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v6.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v2.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v1.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v3.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v5.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v13.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v19.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v21.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v23.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.l, 8, v25.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v27.h, 8, v27.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v80.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.l, 8, v48.h ; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.h, 8, v80.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v16.l, 8, v81.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v65.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v65.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v17.l, 8, v83.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v66.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v66.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v18.l, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v67.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v67.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.l, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v68.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v23.h, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v68.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.l, 8, v98.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v69.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v22.h, 8, v99.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v69.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.l, 8, v100.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v70.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v21.h, 8, v101.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v70.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.l, 8, v102.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v71.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v20.h, 8, v103.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v71.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.l, 8, v112.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v80.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v81 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v19.h, 8, v113.l +; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v86 ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB86_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -54913,95 +53758,95 @@ define <8 x double> @bitcast_v64i8_to_v8f64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB86_3: ; %cmp.false -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v55.h -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v53.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.h, 0 -; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v52.h -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v0.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v55.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v64.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v54.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v49.h -; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v49.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v64, v0 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v1.h, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v39.h +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v37.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, 0 +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v0.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v39.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v80.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v1.l, v38.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v33.h +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v33.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v80, v0 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v1.h, v37.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v29.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v28.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v51.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.h, v2.h, v35.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v26.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v2.l, v51.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v48.h -; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v39.l -; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v48.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v2.l, v35.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v31.l +; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v32.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v24.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v64, v2 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v3.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v50.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v3.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v3.h, v34.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v30.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v24.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v26.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v64, v3 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v4.l, v39.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v80, v3 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v4.l, v31.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v27.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v64, v4 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v5.l, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v80, v4 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v5.l, v29.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v23.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v64, v5 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v6.l, v27.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v36.h +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v80, v5 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v6.l, v27.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v67.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v22.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v64, v6 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v7.l, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v80, v6 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v7.l, v25.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v21.h -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v64, v7 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v8.l, v23.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v80, v7 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v8.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v20.h ; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v19.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v64, v8 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v9.l, v22.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v64.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v80, v8 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v9.l, v22.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v80.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v51.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v18.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v64, v9 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v10.l, v21.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v64.h +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v80, v9 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v10.l, v21.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v17.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 @@ -55010,32 +53855,32 @@ define <8 x double> @bitcast_v64i8_to_v8f64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr27_lo16 @@ -55053,199 +53898,199 @@ define <8 x double> @bitcast_v64i8_to_v8f64(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v64, v10 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v11.l, v20.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v80, v10 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v11.l, v20.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v80.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v16.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_hi16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v12.l, v19.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v80, v11 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v12.l, v19.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v64, v12 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v13.l, v18.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v80, v12 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v13.l, v18.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v64, v13 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v14.l, v17.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v80, v13 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v14.l, v17.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v64, v14 -; GFX11-TRUE16-NEXT: v_or_b16 v64.l, v15.l, v16.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v64.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v80, v14 +; GFX11-TRUE16-NEXT: v_or_b16 v80.l, v15.l, v16.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v80.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_lo16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v64, v15 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v80, v15 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB86_2 ; GFX11-TRUE16-NEXT: .LBB86_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v55.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v52.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.h, 0 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v39.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v36.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v36.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, 0 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v54.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.l, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v54.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.l, v1.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v49.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v38.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.l, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v38.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.l, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.h, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v33.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v48.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v52, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v1.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v32.h, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v36, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v39.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v51.l, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v35.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v2.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v52, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v52.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v36, v4 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v36.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v5.h, 0x300, v3.h -; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v50.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v50.h, v2.h +; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v34.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v34.h, v2.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v29.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v28.h, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v52, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v3.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v36, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v3.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, 0x300, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.l, v26.h, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v4.h, v24.h, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v39.h, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v48.l, v3.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v31.h, v3.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v32.l, v3.h ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v4.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v52, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v36, v6 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v7.h, 0x300, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v29.h, v4.l ; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v30.h, v4.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v24.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v26.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v52, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v5.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v36, v7 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v5.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, 0x300, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v6.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v6.h, v30.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v27.l, v5.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v27.h, v5.h ; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v6.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v36, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.h, 0x300, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v25.l, v6.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v25.h, v6.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v38.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v52, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v7.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v71.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v36, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v7.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v37.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v37.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.l, v69.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v8.h, v68.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v23.l, v7.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v23.h, v7.h ; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l ; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v52, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v36, v10 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v22.l, v8.l ; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v22.h, v8.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v52, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v9.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v67.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v36, v11 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v9.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v35.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, v64.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v21.l, v9.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v21.h, v9.h ; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l ; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v10.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v52, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v36, v12 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v11.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v20.l, v10.l ; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v20.h, v10.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v52, v13 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v11.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v55.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v54.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v36, v13 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v11.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v33.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v33.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, v52.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v19.l, v11.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v19.h, v11.h ; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v12.l ; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v12.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v52, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v36, v14 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v13.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v18.l, v12.l ; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v18.h, v12.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v52, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v13.l +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v50.l, 3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v36, v15 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v13.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v18.h, 0x300, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v31.l, 3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v52.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v17.l, v13.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v17.h, v13.h ; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l ; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v14.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v52, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v36, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v17.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v52.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v36.h ; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v16.l, v14.l ; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v16.h, v14.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v52, v17 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v52.l, 0x300, v15.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v36, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v36.l, 0x300, v15.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v15.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v52.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v52, v15 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.l, v36.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v36, v15 ; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -57025,775 +55870,390 @@ define inreg <8 x double> @bitcast_v64i8_to_v8f64_scalar(<64 x i8> inreg %a, i32 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB87_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v8f64_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v39, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v86, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v49, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB87_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-TRUE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB87_3 -; GFX11-TRUE16-NEXT: .LBB87_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-TRUE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB87_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB87_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB87_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v8f64_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v39, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v48, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v49, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v50, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v51, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v52, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v53, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v80, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v64, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v86 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB87_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s17, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s16, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s8, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s6, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s7, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s21, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s6, s7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s7, s7, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s9, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s9, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v83 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v84 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v85 -; GFX11-FAKE16-NEXT: s_and_b32 s11, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v82 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v68 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v69 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s10, 0xffff -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s10, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v18 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v71 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v80 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v81 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v6, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v7, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v9, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v28 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v51 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v55 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v12, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v23 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v96, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v86 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v87 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v96, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB87_3 -; GFX11-FAKE16-NEXT: .LBB87_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s2, s1 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s19, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s1, s2 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s4, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s25, 8 -; GFX11-FAKE16-NEXT: s_addk_i32 s2, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s3, 0x300 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xffff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s2, s2, s3 -; GFX11-FAKE16-NEXT: s_and_b32 s3, s4, 0xffff -; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s27, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v31 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v32 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v38 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s4, 16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 3, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v83, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v84, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v85, v2 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v82, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v68, v10 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v69, v11 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s29, 8 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s7, s6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v1, v2 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s5, 0xffff -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, s5, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v70, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v71, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v80, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v65, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v66, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v67, v9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v64, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v8, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, v13, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 3, v51 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 3, v50 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 3, v49 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 3, v48 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 3, v39 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xff, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v27, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v29, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v54, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v55, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v17, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v19, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v21, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v23, v15 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v25, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v0, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, v2, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v17 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v18, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB87_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB87_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB87_2 +; GFX11-LABEL: bitcast_v64i8_to_v8f64_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v37, v12 +; GFX11-NEXT: v_dual_mov_b32 v36, v10 :: v_dual_mov_b32 v35, v8 +; GFX11-NEXT: v_dual_mov_b32 v34, v6 :: v_dual_mov_b32 v33, v4 +; GFX11-NEXT: v_dual_mov_b32 v32, v2 :: v_dual_mov_b32 v31, v0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v39, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v48, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v49, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v51, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v52, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v53, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v80, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v82, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v65, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v67, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v68, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v69, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v86 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB87_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: s_lshl_b32 s6, s6, 16 +; GFX11-NEXT: s_lshl_b32 s7, s17, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_and_b32 s6, s16, 0xff +; GFX11-NEXT: s_and_b32 s8, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s19, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s6, s6, 0xffff +; GFX11-NEXT: s_lshl_b32 s7, s7, 16 +; GFX11-NEXT: s_and_b32 s8, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s21, 8 +; GFX11-NEXT: s_or_b32 s6, s6, s7 +; GFX11-NEXT: s_or_b32 s7, s8, s9 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_and_b32 s7, s7, 0xffff +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_and_b32 s8, s9, 0xffff +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v31 +; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v32 +; GFX11-NEXT: s_lshl_b32 s9, s9, 16 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v38 +; GFX11-NEXT: s_or_b32 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v83 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v84 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v22 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v24 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v85 +; GFX11-NEXT: s_and_b32 s11, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s12, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v82 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v68 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v69 +; GFX11-NEXT: s_or_b32 s10, s11, s12 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s10, s10, 0xffff +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s10, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v36 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v16 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v18 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v70 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v71 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v80 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v81 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v65 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v66 +; GFX11-NEXT: v_or_b32_e32 v9, v9, v67 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v52 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v64 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v28 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v51 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v50 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v49 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v48 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v39 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v27 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v54 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v55 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v17 +; GFX11-NEXT: v_or_b32_e32 v12, v12, v19 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v21 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v23 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v25 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v87, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v96, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v86, 16, v86 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v87 +; GFX11-NEXT: v_or_b32_e32 v14, v96, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v86 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB87_3 +; GFX11-NEXT: .LBB87_2: ; %cmp.true +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_and_b32 s0, s0, 0xffff +; GFX11-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s2, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_or_b32 s1, s2, s1 +; GFX11-NEXT: s_and_b32 s2, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s19, 8 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_or_b32 s2, s3, s2 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_and_b32 s3, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: s_or_b32 s1, s1, s2 +; GFX11-NEXT: s_or_b32 s2, s4, s3 +; GFX11-NEXT: s_and_b32 s3, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s4, s23, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: s_or_b32 s3, s4, s3 +; GFX11-NEXT: s_and_b32 s4, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s25, 8 +; GFX11-NEXT: s_addk_i32 s2, 0x300 +; GFX11-NEXT: s_addk_i32 s3, 0x300 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: s_and_b32 s2, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: s_or_b32 s2, s2, s3 +; GFX11-NEXT: s_and_b32 s3, s4, 0xffff +; GFX11-NEXT: s_and_b32 s4, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s27, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v31 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v32 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v38 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v33 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 3, v22 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v0, v83, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v84, v1 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_or_b32_e32 v2, v85, v2 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_or_b32_e32 v6, v82, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_or_b32_e32 v10, v68, v10 +; GFX11-NEXT: v_or_b32_e32 v11, v69, v11 +; GFX11-NEXT: s_and_b32 s6, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s29, 8 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-NEXT: s_or_b32 s5, s7, s6 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v10 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: v_or_b32_e32 v5, v1, v2 +; GFX11-NEXT: s_and_b32 s5, s5, 0xffff +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v35 +; GFX11-NEXT: v_or_b32_e32 v4, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v34 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v37 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v52 +; GFX11-NEXT: v_or_b32_e32 v0, v70, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v71, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v80, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_or_b32_e32 v6, v65, v6 +; GFX11-NEXT: v_or_b32_e32 v7, v66, v7 +; GFX11-NEXT: v_or_b32_e32 v9, v67, v9 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v9 +; GFX11-NEXT: v_or_b32_e32 v11, v64, v11 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v6 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_or_b32_e32 v6, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v7, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v8, v8, v12 +; GFX11-NEXT: v_or_b32_e32 v9, v13, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v26 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v28 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v53 +; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 3, v51 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 3, v50 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 3, v49 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v48 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v39 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX11-NEXT: v_or_b32_e32 v0, v27, v0 +; GFX11-NEXT: v_or_b32_e32 v1, v29, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v54, v2 +; GFX11-NEXT: v_or_b32_e32 v3, v55, v3 +; GFX11-NEXT: v_or_b32_e32 v11, v17, v11 +; GFX11-NEXT: v_or_b32_e32 v12, v19, v12 +; GFX11-NEXT: v_or_b32_e32 v14, v21, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v23, v15 +; GFX11-NEXT: v_or_b32_e32 v16, v25, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v11 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v12 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v14 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v15 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v16 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v11 +; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v12 +; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX11-NEXT: v_or_b32_e32 v11, v0, v1 +; GFX11-NEXT: v_or_b32_e32 v12, v2, v3 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 +; GFX11-NEXT: v_or_b32_e32 v14, v18, v14 +; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB87_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB87_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB87_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -68463,45 +66923,45 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v17.l @@ -68527,47 +66987,42 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v25.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v26.h, 8, v26.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v30.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v31.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v28.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v29.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v29.h -; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v29.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v55.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v55.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v39.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v48.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v48.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v50.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v38.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v50.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v52.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v54.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v65.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v66 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v98.l +; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB98_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -68595,22 +67050,22 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v67.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v51.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v23.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v23.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v20.l @@ -68622,27 +67077,27 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v26.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v30.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v31.l ; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v28.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v33.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v29.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v48.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v48.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v51.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v50.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v54.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v51.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v53.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v53.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v33.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v32.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v35.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v34.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v38.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v35.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v37.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v37.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v39.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 @@ -68659,22 +67114,22 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 @@ -68686,46 +67141,46 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB98_2 ; GFX11-TRUE16-NEXT: .LBB98_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v50.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v71.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v55.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v53.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v39.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.h, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v37.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v2.h @@ -68733,46 +67188,46 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v36.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v38.l, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v67.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v50.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v38.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v34.h, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v51.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v35.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v34.l, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v68.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v55.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v52.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v49.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v48.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v39.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v36.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v32.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v31.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l @@ -68788,7 +67243,7 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v49.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v33.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v29.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v28.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v29.l, v1.l @@ -68808,7 +67263,7 @@ define <32 x i16> @bitcast_v64i8_to_v32i16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v39.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v31.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v25.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v26.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v27.h, v1.l @@ -70545,695 +69000,350 @@ define inreg <32 x i16> @bitcast_v64i8_to_v32i16_scalar(<64 x i8> inreg %a, i32 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB99_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v32i16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB99_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB99_3 -; GFX11-TRUE16-NEXT: .LBB99_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB99_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB99_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB99_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v32i16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB99_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB99_3 -; GFX11-FAKE16-NEXT: .LBB99_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v64 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB99_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB99_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB99_2 +; GFX11-LABEL: bitcast_v64i8_to_v32i16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 +; GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 +; GFX11-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 +; GFX11-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v39, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v49, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v48, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v51, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v50, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v53, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v52, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v84 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB99_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s10, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s29, 8 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v39 +; GFX11-NEXT: s_or_b32 s10, s10, s11 +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: v_and_b32_e64 v1, 0xffff, s10 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v38 +; GFX11-NEXT: s_or_b32 s9, s9, s12 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v31 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v37 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v48 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v36 +; GFX11-NEXT: v_or_b32_e32 v1, v2, v49 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v51 +; GFX11-NEXT: v_or_b32_e32 v7, v5, v50 +; GFX11-NEXT: v_or_b32_e32 v8, v6, v52 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v24 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v68 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v16 +; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v18 +; GFX11-NEXT: v_lshl_or_b32 v7, v8, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v3, v17 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v22 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v23 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v8, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v9, v21 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshl_or_b32 v9, v3, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v80 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v82 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v28 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v27 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v10 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v66 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v70 +; GFX11-NEXT: v_or_b32_e32 v15, v12, v71 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v69 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v83 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v65 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v84 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v25 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v87, 0xffff, v11 +; GFX11-NEXT: v_or_b32_e32 v96, v12, v81 +; GFX11-NEXT: v_and_b32_e32 v97, 0xffff, v13 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v85 +; GFX11-NEXT: v_and_b32_e32 v98, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v11, v0, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v12, v1, 16, v3 +; GFX11-NEXT: v_lshl_or_b32 v13, v15, 16, v87 +; GFX11-NEXT: v_lshl_or_b32 v14, v96, 16, v97 +; GFX11-NEXT: v_lshl_or_b32 v15, v86, 16, v98 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB99_3 +; GFX11-NEXT: .LBB99_2: ; %cmp.true +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v64 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v22 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v16 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v4 +; GFX11-NEXT: v_or_b32_e32 v5, v71, v5 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v66, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v26 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v28 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v29, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v4, v27, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v25, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v23, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_or_b32_e32 v5, v21, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v55, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v32 +; GFX11-NEXT: v_or_b32_e32 v5, v19, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v54, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v17, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v53, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v33 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: v_or_b32_e32 v5, v52, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v4, v51, v4 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v50, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v49, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v82 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v80 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v65 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v84, v0 +; GFX11-NEXT: v_or_b32_e32 v2, v83, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v48, v8 +; GFX11-NEXT: v_or_b32_e32 v4, v39, v4 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_or_b32_e32 v1, v85, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v21 +; GFX11-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-NEXT: v_and_b32_e32 v20, 0xffff, v6 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v16 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v6, v23, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v7, v22, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v19, 16, v17 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v20 +; GFX11-NEXT: v_lshl_or_b32 v11, v18, 16, v15 +; GFX11-NEXT: v_lshl_or_b32 v12, v14, 16, v12 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v16 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v15, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB99_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB99_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB99_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -80688,45 +78798,45 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v17.l @@ -80752,47 +78862,42 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v25.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v26.h, 8, v26.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v30.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v31.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v28.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v29.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v29.h -; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v29.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v55.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v55.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v39.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v48.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v48.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v50.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v38.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v50.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v52.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v54.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v65.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v66 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v98.l +; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB106_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -80820,22 +78925,22 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v67.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v51.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v23.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v23.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v20.l @@ -80847,27 +78952,27 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v26.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v30.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v31.l ; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v28.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v33.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v29.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v48.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v48.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v51.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v50.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v54.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v51.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v53.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v53.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v33.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v32.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v35.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v34.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v38.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v35.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v37.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v37.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v39.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 @@ -80884,22 +78989,22 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 @@ -80911,46 +79016,46 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB106_2 ; GFX11-TRUE16-NEXT: .LBB106_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v50.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v71.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v55.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v53.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v39.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.h, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v37.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v2.h @@ -80958,46 +79063,46 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v36.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v38.l, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v67.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v50.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v38.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v34.h, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v51.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v35.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v34.l, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v68.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v55.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v52.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v49.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v48.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v39.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v36.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v32.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v31.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l @@ -81013,7 +79118,7 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v49.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v33.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v29.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v28.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v29.l, v1.l @@ -81033,7 +79138,7 @@ define <32 x half> @bitcast_v64i8_to_v32f16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v39.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v31.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v25.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v26.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v27.h, v1.l @@ -82736,695 +80841,350 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB107_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v32f16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB107_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB107_3 -; GFX11-TRUE16-NEXT: .LBB107_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB107_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB107_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB107_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v32f16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB107_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB107_3 -; GFX11-FAKE16-NEXT: .LBB107_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v64 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB107_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB107_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB107_2 +; GFX11-LABEL: bitcast_v64i8_to_v32f16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 +; GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 +; GFX11-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 +; GFX11-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v39, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v49, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v48, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v51, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v50, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v53, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v52, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v84 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB107_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s10, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s29, 8 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v39 +; GFX11-NEXT: s_or_b32 s10, s10, s11 +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: v_and_b32_e64 v1, 0xffff, s10 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v38 +; GFX11-NEXT: s_or_b32 s9, s9, s12 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v31 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v37 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v48 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v36 +; GFX11-NEXT: v_or_b32_e32 v1, v2, v49 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v51 +; GFX11-NEXT: v_or_b32_e32 v7, v5, v50 +; GFX11-NEXT: v_or_b32_e32 v8, v6, v52 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v24 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v68 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v16 +; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v18 +; GFX11-NEXT: v_lshl_or_b32 v7, v8, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v3, v17 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v22 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v23 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v8, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v9, v21 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshl_or_b32 v9, v3, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v80 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v82 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v28 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v27 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v10 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v66 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v70 +; GFX11-NEXT: v_or_b32_e32 v15, v12, v71 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v69 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v83 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v65 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v84 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v25 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v87, 0xffff, v11 +; GFX11-NEXT: v_or_b32_e32 v96, v12, v81 +; GFX11-NEXT: v_and_b32_e32 v97, 0xffff, v13 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v85 +; GFX11-NEXT: v_and_b32_e32 v98, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v11, v0, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v12, v1, 16, v3 +; GFX11-NEXT: v_lshl_or_b32 v13, v15, 16, v87 +; GFX11-NEXT: v_lshl_or_b32 v14, v96, 16, v97 +; GFX11-NEXT: v_lshl_or_b32 v15, v86, 16, v98 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB107_3 +; GFX11-NEXT: .LBB107_2: ; %cmp.true +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v64 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v22 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v16 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v4 +; GFX11-NEXT: v_or_b32_e32 v5, v71, v5 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v66, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v26 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v28 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v29, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v4, v27, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v25, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v23, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_or_b32_e32 v5, v21, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v55, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v32 +; GFX11-NEXT: v_or_b32_e32 v5, v19, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v54, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v17, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v53, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v33 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: v_or_b32_e32 v5, v52, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v4, v51, v4 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v50, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v49, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v82 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v80 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v65 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v84, v0 +; GFX11-NEXT: v_or_b32_e32 v2, v83, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v48, v8 +; GFX11-NEXT: v_or_b32_e32 v4, v39, v4 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_or_b32_e32 v1, v85, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v21 +; GFX11-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-NEXT: v_and_b32_e32 v20, 0xffff, v6 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v16 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v6, v23, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v7, v22, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v19, 16, v17 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v20 +; GFX11-NEXT: v_lshl_or_b32 v11, v18, 16, v15 +; GFX11-NEXT: v_lshl_or_b32 v12, v14, 16, v12 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v16 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v15, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB107_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB107_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB107_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -91195,45 +88955,45 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v55, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v55, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v53, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v31, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v53, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v32, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v52, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v54, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v39, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v48, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v48, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v50, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v50, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v51, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v52, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v54, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v64, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v38, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v36, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v38, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v35, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v37, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v36, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v37, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v34, off, s32 offset:20 +; GFX11-TRUE16-NEXT: scratch_load_u16 v80, off, s32 offset:128 +; GFX11-TRUE16-NEXT: scratch_load_u16 v48, off, s32 offset:124 +; GFX11-TRUE16-NEXT: scratch_load_u16 v39, off, s32 offset:120 +; GFX11-TRUE16-NEXT: scratch_load_u16 v51, off, s32 offset:116 +; GFX11-TRUE16-NEXT: scratch_load_u16 v81, off, s32 offset:112 +; GFX11-TRUE16-NEXT: scratch_load_u16 v49, off, s32 offset:108 +; GFX11-TRUE16-NEXT: scratch_load_u16 v37, off, s32 offset:104 +; GFX11-TRUE16-NEXT: scratch_load_u16 v52, off, s32 offset:100 +; GFX11-TRUE16-NEXT: scratch_load_u16 v82, off, s32 offset:96 +; GFX11-TRUE16-NEXT: scratch_load_u16 v50, off, s32 offset:92 +; GFX11-TRUE16-NEXT: scratch_load_u16 v83, off, s32 offset:88 +; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:132 +; GFX11-TRUE16-NEXT: scratch_load_u16 v34, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v35, off, s32 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u16 v36, off, s32 offset:16 +; GFX11-TRUE16-NEXT: scratch_load_u16 v38, off, s32 offset:24 +; GFX11-TRUE16-NEXT: scratch_load_u16 v84, off, s32 offset:32 +; GFX11-TRUE16-NEXT: scratch_load_u16 v85, off, s32 offset:40 +; GFX11-TRUE16-NEXT: scratch_load_u16 v86, off, s32 offset:48 +; GFX11-TRUE16-NEXT: scratch_load_u16 v87, off, s32 offset:56 +; GFX11-TRUE16-NEXT: scratch_load_u16 v96, off, s32 offset:64 +; GFX11-TRUE16-NEXT: scratch_load_u16 v97, off, s32 offset:72 +; GFX11-TRUE16-NEXT: scratch_load_u16 v98, off, s32 offset:80 +; GFX11-TRUE16-NEXT: scratch_load_u16 v70, off, s32 offset:84 +; GFX11-TRUE16-NEXT: scratch_load_u16 v54, off, s32 offset:76 +; GFX11-TRUE16-NEXT: scratch_load_u16 v71, off, s32 offset:68 +; GFX11-TRUE16-NEXT: scratch_load_u16 v64, off, s32 offset:60 +; GFX11-TRUE16-NEXT: scratch_load_u16 v67, off, s32 offset:52 +; GFX11-TRUE16-NEXT: scratch_load_u16 v66, off, s32 offset:44 +; GFX11-TRUE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-TRUE16-NEXT: scratch_load_u16 v53, off, s32 offset:28 +; GFX11-TRUE16-NEXT: scratch_load_u16 v65, off, s32 offset:20 ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v33, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:4 +; GFX11-TRUE16-NEXT: scratch_load_u16 v55, off, s32 offset:12 +; GFX11-TRUE16-NEXT: scratch_load_u16 v68, off, s32 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v29.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v27.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v25.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v23.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.l, v23.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v21.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v19.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v17.l @@ -91259,47 +89019,42 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v25.h, 8, v25.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v26.h, 8, v26.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v30.h, 8, v30.h -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.l, 8, v31.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v28.h, 8, v28.h ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.l, 8, v29.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.l, 8, v29.h -; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.l, 8, v29.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(33) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.h, 8, v55.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v55.l, 8, v55.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.h, 8, v53.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v80.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.l, 8, v39.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v53.l, 8, v53.l -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.h, 8, v52.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.h, 8, v54.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v39.h -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v49.h, 8, v48.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.h, 8, v81.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v37.l, 8, v37.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v39.h, 8, v48.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.h, 8, v82.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.l, 8, v50.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.h, 8, v83.l +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v29.h, 8, v34.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v33.h, 8, v35.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v31.h, 8, v36.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.l, 8, v38.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v48.h, 8, v50.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v32.h, 8, v84.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.h, 8, v51.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.h, 8, v85.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v52.l, 8, v52.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v36.l, 8, v86.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.l, 8, v54.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.l, 8, v87.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v50.h, 8, v64.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v34.h, 8, v96.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v54.l, 8, v64.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v38.l, 8, v97.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v51.l, 8, v65.l -; GFX11-TRUE16-NEXT: v_cmpx_ne_u32_e32 0, v66 +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v35.l, 8, v98.l +; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB110_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %Flow @@ -91327,22 +89082,22 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v28.l ; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v30.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v35.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v33.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v34.h -; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v34.l -; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v37.h -; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v36.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v37.l -; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v35.h -; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v38.l -; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v36.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v38.h -; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v31.l -; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v32.h -; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v31.h -; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v33.l -; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v68.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v55.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v65.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v53.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v69.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v66.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.l, 0xff, v67.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v64.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.l, 0xff, v71.l +; GFX11-TRUE16-NEXT: v_and_b16 v12.h, 0xff, v54.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.l, 0xff, v70.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v50.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v52.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.h, 0xff, v49.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.l, 0xff, v51.l +; GFX11-TRUE16-NEXT: v_and_b16 v15.h, 0xff, v48.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v23.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v0.h, v23.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v1.l, v20.l @@ -91354,27 +89109,27 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v4.l, v25.h ; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v4.h, v26.h ; GFX11-TRUE16-NEXT: v_or_b16 v5.l, v5.l, v30.h -; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v5.h, v5.h, v31.l ; GFX11-TRUE16-NEXT: v_or_b16 v6.l, v6.l, v28.h ; GFX11-TRUE16-NEXT: v_or_b16 v6.h, v6.h, v29.l -; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v49.l +; GFX11-TRUE16-NEXT: v_or_b16 v7.l, v7.l, v33.l ; GFX11-TRUE16-NEXT: v_or_b16 v7.h, v7.h, v29.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v49.h -; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v39.h -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v48.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v48.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v51.h -; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v52.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v50.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v50.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v54.l -; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v51.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v54.h -; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v52.h -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v53.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v53.h -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v55.l -; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v55.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.l, v8.l, v33.h +; GFX11-TRUE16-NEXT: v_or_b16 v8.h, v8.h, v31.h +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v9.l, v32.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.h, v9.h, v32.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v10.l, v35.h +; GFX11-TRUE16-NEXT: v_or_b16 v10.h, v10.h, v36.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v11.l, v34.l +; GFX11-TRUE16-NEXT: v_or_b16 v11.h, v11.h, v34.h +; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v12.l, v38.l +; GFX11-TRUE16-NEXT: v_or_b16 v12.h, v12.h, v35.l +; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v13.l, v38.h +; GFX11-TRUE16-NEXT: v_or_b16 v13.h, v13.h, v36.h +; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v14.l, v37.l +; GFX11-TRUE16-NEXT: v_or_b16 v14.h, v14.h, v37.h +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v15.l, v39.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.h, v15.h, v39.h ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr18_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr19_lo16 @@ -91391,22 +89146,22 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr71 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr64 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr67 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr69 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr65 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr68 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr23_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr20_lo16 @@ -91418,46 +89173,46 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr25_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr26_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr30_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_hi16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr29_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr49_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr33_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr32_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr34_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr35_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr38_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_lo16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr37_hi16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_lo16 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr39_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr48_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr50_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr51_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr54_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr52_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr53_hi16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_lo16 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_hi16 ; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB110_2 ; GFX11-TRUE16-NEXT: .LBB110_4: ; %cmp.true -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v33.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v32.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v32.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v31.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v31.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v51.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v48.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v52.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v49.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.h, v50.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v38.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v3.l, v71.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v55.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v55.h, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v38.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v53.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v53.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v39.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v39.h, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v70.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v37.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v37.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.l, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v15.h, 0x300, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v2.h @@ -91465,46 +89220,46 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.l, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v14.h, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.h, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v52.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v54.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v64.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v36.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v38.l, v0.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v37.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v69.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v36.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v37.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v54.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v67.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v54.h, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v50.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v38.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v34.h, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v51.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v35.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v13.l, 0x300, v2.l -; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v36.l, 3 -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v51.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v50.l, v0.h +; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v66.l, 3 +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v35.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v34.l, v0.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.h, 0x300, v1.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.l, 0x300, v1.h ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v35.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.l, v68.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v12.h, 0x300, v0.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v11.l, 0x300, v0.h -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v34.h, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v34.l, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v33.h, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v65.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.h, v53.l, 3 +; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v55.l, 3 ; GFX11-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v52.l, v2.l -; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v49.h, v1.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v48.l, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v48.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v39.h, v1.h +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v36.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v33.h, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v32.l, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v32.h, v0.h +; GFX11-TRUE16-NEXT: v_or_b16 v1.h, v31.h, v1.h ; GFX11-TRUE16-NEXT: v_add_nc_u16 v10.h, 0x300, v2.l ; GFX11-TRUE16-NEXT: v_add_nc_u16 v2.l, v28.l, 3 ; GFX11-TRUE16-NEXT: v_add_nc_u16 v9.l, 0x300, v0.l @@ -91520,7 +89275,7 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v49.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v33.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v29.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v28.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v29.l, v1.l @@ -91540,7 +89295,7 @@ define <32 x bfloat> @bitcast_v64i8_to_v32bf16(<64 x i8> %a, i32 %b) { ; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v1.h -; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v39.l, v2.l +; GFX11-TRUE16-NEXT: v_or_b16 v2.l, v31.l, v2.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v25.h, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v26.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v1.l, v27.h, v1.l @@ -93247,695 +91002,350 @@ define inreg <32 x bfloat> @bitcast_v64i8_to_v32bf16_scalar(<64 x i8> inreg %a, ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX9-NEXT: s_branch .LBB111_2 ; -; GFX11-TRUE16-LABEL: bitcast_v64i8_to_v32bf16_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-TRUE16-NEXT: s_clause 0xf -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v64, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v4, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v6, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v8, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v10, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v12, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v14, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v84, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v82, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v69, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v80, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v67, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v68, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v65, off, s32 offset:4 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB111_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-TRUE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-TRUE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v65 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v86, 0xff, v64 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB111_3 -; GFX11-TRUE16-NEXT: .LBB111_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v65 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-TRUE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-TRUE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-TRUE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-TRUE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-TRUE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-TRUE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-TRUE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-TRUE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-TRUE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-TRUE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-TRUE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-TRUE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-TRUE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-TRUE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-TRUE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-TRUE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-TRUE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 3, v64 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-TRUE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-TRUE16-NEXT: .LBB111_3: ; %end -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-TRUE16-NEXT: .LBB111_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-TRUE16-NEXT: s_branch .LBB111_2 -; -; GFX11-FAKE16-LABEL: bitcast_v64i8_to_v32bf16_scalar: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 -; GFX11-FAKE16-NEXT: s_clause 0xf -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u16 v65, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u16 v4, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v6, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v8, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v10, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u16 v12, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u16 v14, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u16 v84, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u16 v82, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u16 v69, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u16 v80, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u16 v67, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u16 v68, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v64, off, s32 offset:4 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 8, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 8, v3 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 8, v5 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v51, 8, v7 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 8, v9 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 8, v11 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v52, 8, v13 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 8, v15 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 8, v17 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v55, 8, v19 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 8, v21 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 8, v23 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 8, v25 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 8, v27 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 8, v29 -; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 8, v0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 8, v4 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 8, v6 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 8, v8 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 8, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 8, v12 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v81, 8, v14 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v84, 8, v84 -; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB111_4 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: s_and_b32 s5, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s3, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s17, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s19, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s5, s5, s6 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s6, s7, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s23, 8 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s25, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s7, s7, s8 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s10 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s29, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v39 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s10, s11 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s26, 0xff -; GFX11-FAKE16-NEXT: v_and_b32_e64 v1, 0xffff, s10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s12, s27, 8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v38 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s9, s12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v31 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s8, s8, s9 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v49 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v51 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v5, v50 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, v6, v52 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v53 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v24 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v68 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v3, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v7, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v18 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v8, 16, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v54 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v3, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v55 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v67 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, v8, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v2, 16, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v9, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v3, 16, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v30 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v80 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v82 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v28 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v2, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v1, 16, v10 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v64 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v3, v66 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v11, v11, v70 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v15, v12, v71 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v69 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, v13, v83 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xff, v65 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, v14, v84 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v25 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v87, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v96, v12, v81 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v97, 0xffff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v86, v86, v85 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v98, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v1, 16, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v15, 16, v87 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v96, 16, v97 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v86, 16, v98 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 -; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB111_3 -; GFX11-FAKE16-NEXT: .LBB111_2: ; %cmp.true -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v68 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v67 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 3, v30 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v64 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v22 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v16 -; GFX11-FAKE16-NEXT: s_add_i32 s28, s28, 3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v70, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v71, v5 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s28, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s29, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s24, s24, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v66, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v26 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v28 -; GFX11-FAKE16-NEXT: s_or_b32 s4, s5, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v29, v6 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v24 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v27, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v25, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v6, v23, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v21, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s24, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s25, 8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v55, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 3, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v19, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v54, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v17, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v9 -; GFX11-FAKE16-NEXT: s_add_i32 s26, s26, 3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 3, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v53, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 3, v33 -; GFX11-FAKE16-NEXT: s_or_b32 s5, s6, s5 -; GFX11-FAKE16-NEXT: s_and_b32 s6, s26, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s27, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s20, s20, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s6, s7, s6 -; GFX11-FAKE16-NEXT: s_and_b32 s7, s20, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s8, s21, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s22, s22, 3 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 3, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v7 -; GFX11-FAKE16-NEXT: s_or_b32 s7, s8, s7 -; GFX11-FAKE16-NEXT: s_and_b32 s8, s22, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s9, s23, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s16, s16, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s8, s9, s8 -; GFX11-FAKE16-NEXT: s_and_b32 s9, s16, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s10, s17, 8 -; GFX11-FAKE16-NEXT: s_add_i32 s18, s18, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s0, s0, 3 -; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 3 -; GFX11-FAKE16-NEXT: s_or_b32 s9, s10, s9 -; GFX11-FAKE16-NEXT: s_and_b32 s10, s18, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s11, s19, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-FAKE16-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v52, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 3, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v51, v4 -; GFX11-FAKE16-NEXT: s_or_b32 s10, s11, s10 -; GFX11-FAKE16-NEXT: s_or_b32 s0, s1, s0 -; GFX11-FAKE16-NEXT: s_or_b32 s1, s3, s2 -; GFX11-FAKE16-NEXT: s_addk_i32 s5, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s6, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s9, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s0, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s1, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s10, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v50, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v49, v8 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 3, v35 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s0, s0, s1 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s1, s9, s10 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 3, v82 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s3, s5, s6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 3, v80 -; GFX11-FAKE16-NEXT: s_addk_i32 s7, 0x300 -; GFX11-FAKE16-NEXT: s_addk_i32 s8, 0x300 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 3, v65 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s7, s8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 3, v69 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v84, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v83, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, v48, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v39, v4 -; GFX11-FAKE16-NEXT: s_addk_i32 s4, 0x300 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v85, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v81, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 -; GFX11-FAKE16-NEXT: v_and_b32_e64 v8, 0xffff, s4 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v4, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v7, 16, v5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v6 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v9, 16, v16 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v23, 16, v7 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v22, 16, v8 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v19, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v10, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v18, 16, v15 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v14, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v13, 16, v16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v3, 16, v2 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-FAKE16-NEXT: .LBB111_3: ; %end -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] -; GFX11-FAKE16-NEXT: .LBB111_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX11-FAKE16-NEXT: s_branch .LBB111_2 +; GFX11-LABEL: bitcast_v64i8_to_v32bf16_scalar: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v31, v8 +; GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v33, v6 +; GFX11-NEXT: v_dual_mov_b32 v32, v10 :: v_dual_mov_b32 v35, v0 +; GFX11-NEXT: v_dual_mov_b32 v38, v4 :: v_dual_mov_b32 v37, v2 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_load_u16 v0, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u16 v65, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u16 v4, off, s32 +; GFX11-NEXT: scratch_load_u16 v6, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v8, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v10, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u16 v12, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u16 v14, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u16 v84, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u16 v82, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u16 v69, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u16 v80, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u16 v67, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u16 v68, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v64, off, s32 offset:4 +; GFX11-NEXT: v_lshlrev_b32_e32 v39, 8, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v49, 8, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v48, 8, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v51, 8, v7 +; GFX11-NEXT: v_lshlrev_b32_e32 v50, 8, v9 +; GFX11-NEXT: v_lshlrev_b32_e32 v53, 8, v11 +; GFX11-NEXT: v_lshlrev_b32_e32 v52, 8, v13 +; GFX11-NEXT: v_lshlrev_b32_e32 v54, 8, v15 +; GFX11-NEXT: v_lshlrev_b32_e32 v17, 8, v17 +; GFX11-NEXT: v_lshlrev_b32_e32 v55, 8, v19 +; GFX11-NEXT: v_lshlrev_b32_e32 v19, 8, v21 +; GFX11-NEXT: v_lshlrev_b32_e32 v23, 8, v23 +; GFX11-NEXT: v_lshlrev_b32_e32 v21, 8, v25 +; GFX11-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; GFX11-NEXT: v_lshlrev_b32_e32 v25, 8, v29 +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: v_lshlrev_b32_e32 v85, 8, v0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: v_lshlrev_b32_e32 v66, 8, v4 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: v_lshlrev_b32_e32 v29, 8, v6 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: v_lshlrev_b32_e32 v70, 8, v8 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: v_lshlrev_b32_e32 v71, 8, v10 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: v_lshlrev_b32_e32 v83, 8, v12 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: v_lshlrev_b32_e32 v81, 8, v14 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: v_lshlrev_b32_e32 v84, 8, v84 +; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-NEXT: s_cbranch_scc0 .LBB111_4 +; GFX11-NEXT: ; %bb.1: ; %cmp.false +; GFX11-NEXT: s_and_b32 s5, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s1, 8 +; GFX11-NEXT: s_and_b32 s7, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s3, 8 +; GFX11-NEXT: s_or_b32 s5, s5, s6 +; GFX11-NEXT: s_or_b32 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s17, 8 +; GFX11-NEXT: s_and_b32 s9, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s19, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_pack_ll_b32_b16 s5, s5, s6 +; GFX11-NEXT: s_pack_ll_b32_b16 s6, s7, s8 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_and_b32 s9, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s23, 8 +; GFX11-NEXT: s_or_b32 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s9, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s25, 8 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s7, s7, s8 +; GFX11-NEXT: s_or_b32 s8, s9, s10 +; GFX11-NEXT: s_and_b32 s10, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s29, 8 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v39 +; GFX11-NEXT: s_or_b32 s10, s10, s11 +; GFX11-NEXT: s_and_b32 s9, s26, 0xff +; GFX11-NEXT: v_and_b32_e64 v1, 0xffff, s10 +; GFX11-NEXT: s_lshl_b32 s12, s27, 8 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v38 +; GFX11-NEXT: s_or_b32 s9, s9, s12 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v31 +; GFX11-NEXT: s_pack_ll_b32_b16 s8, s8, s9 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v37 +; GFX11-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v33 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v48 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v36 +; GFX11-NEXT: v_or_b32_e32 v1, v2, v49 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v32 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v51 +; GFX11-NEXT: v_or_b32_e32 v7, v5, v50 +; GFX11-NEXT: v_or_b32_e32 v8, v6, v52 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v53 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v24 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v68 +; GFX11-NEXT: v_lshl_or_b32 v5, v3, 16, v1 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v34 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v16 +; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v18 +; GFX11-NEXT: v_lshl_or_b32 v7, v8, 16, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v54 +; GFX11-NEXT: v_or_b32_e32 v2, v3, v17 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v22 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v55 +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v67 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v19 +; GFX11-NEXT: v_or_b32_e32 v10, v8, v23 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v8, v2, 16, v1 +; GFX11-NEXT: v_or_b32_e32 v1, v9, v21 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v26 +; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX11-NEXT: v_lshl_or_b32 v9, v3, 16, v0 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v30 +; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v80 +; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v82 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v28 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v27 +; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v10 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v64 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v66 +; GFX11-NEXT: v_or_b32_e32 v11, v11, v70 +; GFX11-NEXT: v_or_b32_e32 v15, v12, v71 +; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v69 +; GFX11-NEXT: v_or_b32_e32 v13, v13, v83 +; GFX11-NEXT: v_and_b32_e32 v86, 0xff, v65 +; GFX11-NEXT: v_or_b32_e32 v14, v14, v84 +; GFX11-NEXT: v_or_b32_e32 v0, v0, v25 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_or_b32_e32 v1, v1, v29 +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX11-NEXT: v_and_b32_e32 v87, 0xffff, v11 +; GFX11-NEXT: v_or_b32_e32 v96, v12, v81 +; GFX11-NEXT: v_and_b32_e32 v97, 0xffff, v13 +; GFX11-NEXT: v_or_b32_e32 v86, v86, v85 +; GFX11-NEXT: v_and_b32_e32 v98, 0xffff, v14 +; GFX11-NEXT: v_lshl_or_b32 v11, v0, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v12, v1, 16, v3 +; GFX11-NEXT: v_lshl_or_b32 v13, v15, 16, v87 +; GFX11-NEXT: v_lshl_or_b32 v14, v96, 16, v97 +; GFX11-NEXT: v_lshl_or_b32 v15, v86, 16, v98 +; GFX11-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 +; GFX11-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 +; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-NEXT: s_cbranch_vccnz .LBB111_3 +; GFX11-NEXT: .LBB111_2: ; %cmp.true +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v68 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v67 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 3, v30 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v64 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v22 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v16 +; GFX11-NEXT: s_add_i32 s28, s28, 3 +; GFX11-NEXT: v_or_b32_e32 v4, v70, v4 +; GFX11-NEXT: v_or_b32_e32 v5, v71, v5 +; GFX11-NEXT: s_and_b32 s4, s28, 0xff +; GFX11-NEXT: s_lshl_b32 s5, s29, 8 +; GFX11-NEXT: s_add_i32 s24, s24, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v11, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v13, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v4, v66, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v26 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v28 +; GFX11-NEXT: s_or_b32 s4, s5, s4 +; GFX11-NEXT: v_add_nc_u32_e32 v12, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v29, v6 +; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v24 +; GFX11-NEXT: v_or_b32_e32 v4, v27, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v14, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v25, v6 +; GFX11-NEXT: v_or_b32_e32 v6, v23, v7 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v15, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v18 +; GFX11-NEXT: v_add_nc_u32_e32 v18, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v34 +; GFX11-NEXT: v_or_b32_e32 v5, v21, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v20 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_and_b32 s5, s24, 0xff +; GFX11-NEXT: s_lshl_b32 s6, s25, 8 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v55, v4 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 3, v32 +; GFX11-NEXT: v_or_b32_e32 v5, v19, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v16, 0x300, v4 +; GFX11-NEXT: v_or_b32_e32 v4, v54, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v17, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v9 +; GFX11-NEXT: s_add_i32 s26, s26, 3 +; GFX11-NEXT: v_add_nc_u32_e32 v9, 0x300, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 3, v36 +; GFX11-NEXT: v_add_nc_u32_e32 v17, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v19, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v4, v53, v8 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 3, v33 +; GFX11-NEXT: s_or_b32 s5, s6, s5 +; GFX11-NEXT: s_and_b32 s6, s26, 0xff +; GFX11-NEXT: s_lshl_b32 s7, s27, 8 +; GFX11-NEXT: s_add_i32 s20, s20, 3 +; GFX11-NEXT: s_or_b32 s6, s7, s6 +; GFX11-NEXT: s_and_b32 s7, s20, 0xff +; GFX11-NEXT: s_lshl_b32 s8, s21, 8 +; GFX11-NEXT: s_add_i32 s22, s22, 3 +; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX11-NEXT: v_add_nc_u32_e32 v8, 3, v31 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v37 +; GFX11-NEXT: v_add_nc_u32_e32 v21, 0x300, v4 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v7 +; GFX11-NEXT: s_or_b32 s7, s8, s7 +; GFX11-NEXT: s_and_b32 s8, s22, 0xff +; GFX11-NEXT: s_lshl_b32 s9, s23, 8 +; GFX11-NEXT: s_add_i32 s16, s16, 3 +; GFX11-NEXT: s_or_b32 s8, s9, s8 +; GFX11-NEXT: s_and_b32 s9, s16, 0xff +; GFX11-NEXT: s_lshl_b32 s10, s17, 8 +; GFX11-NEXT: s_add_i32 s18, s18, 3 +; GFX11-NEXT: s_add_i32 s0, s0, 3 +; GFX11-NEXT: s_add_i32 s2, s2, 3 +; GFX11-NEXT: s_or_b32 s9, s10, s9 +; GFX11-NEXT: s_and_b32 s10, s18, 0xff +; GFX11-NEXT: s_lshl_b32 s11, s19, 8 +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_lshl_b32 s1, s1, 8 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 +; GFX11-NEXT: v_or_b32_e32 v5, v52, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 3, v38 +; GFX11-NEXT: v_or_b32_e32 v4, v51, v4 +; GFX11-NEXT: s_or_b32 s10, s11, s10 +; GFX11-NEXT: s_or_b32 s0, s1, s0 +; GFX11-NEXT: s_or_b32 s1, s3, s2 +; GFX11-NEXT: s_addk_i32 s5, 0x300 +; GFX11-NEXT: s_addk_i32 s6, 0x300 +; GFX11-NEXT: s_addk_i32 s9, 0x300 +; GFX11-NEXT: s_addk_i32 s0, 0x300 +; GFX11-NEXT: s_addk_i32 s1, 0x300 +; GFX11-NEXT: s_addk_i32 s10, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x300, v5 +; GFX11-NEXT: v_or_b32_e32 v5, v50, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v49, v8 +; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v20 +; GFX11-NEXT: v_add_nc_u32_e32 v20, 0x300, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 3, v35 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX11-NEXT: s_pack_ll_b32_b16 s1, s9, s10 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 3, v82 +; GFX11-NEXT: s_pack_ll_b32_b16 s3, s5, s6 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 3, v80 +; GFX11-NEXT: s_addk_i32 s7, 0x300 +; GFX11-NEXT: s_addk_i32 s8, 0x300 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v65 +; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s8 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 3, v69 +; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX11-NEXT: v_or_b32_e32 v0, v84, v0 +; GFX11-NEXT: v_or_b32_e32 v2, v83, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v23, 0x300, v5 +; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v5, 0x300, v7 +; GFX11-NEXT: v_or_b32_e32 v7, v48, v8 +; GFX11-NEXT: v_or_b32_e32 v4, v39, v4 +; GFX11-NEXT: s_addk_i32 s4, 0x300 +; GFX11-NEXT: v_or_b32_e32 v1, v85, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x300, v0 +; GFX11-NEXT: v_or_b32_e32 v3, v81, v3 +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x300, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x300, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x300, v7 +; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x300, v4 +; GFX11-NEXT: v_and_b32_e64 v8, 0xffff, s4 +; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX11-NEXT: v_add_nc_u32_e32 v1, 0x300, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v3, 0x300, v3 +; GFX11-NEXT: v_lshl_or_b32 v4, v4, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v5, v7, 16, v5 +; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v20 +; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v21 +; GFX11-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-NEXT: v_and_b32_e32 v20, 0xffff, v6 +; GFX11-NEXT: v_lshl_or_b32 v9, v9, 16, v16 +; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v11 +; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: v_lshl_or_b32 v6, v23, 16, v7 +; GFX11-NEXT: v_lshl_or_b32 v7, v22, 16, v8 +; GFX11-NEXT: v_lshl_or_b32 v8, v19, 16, v17 +; GFX11-NEXT: v_lshl_or_b32 v10, v10, 16, v20 +; GFX11-NEXT: v_lshl_or_b32 v11, v18, 16, v15 +; GFX11-NEXT: v_lshl_or_b32 v12, v14, 16, v12 +; GFX11-NEXT: v_lshl_or_b32 v13, v13, 16, v16 +; GFX11-NEXT: v_lshl_or_b32 v14, v3, 16, v2 +; GFX11-NEXT: v_lshl_or_b32 v15, v1, 16, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: .LBB111_3: ; %end +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-NEXT: .LBB111_4: +; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: s_branch .LBB111_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 49fe1eed9c514..68cca97497564 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -61,21 +61,13 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; GFX10-NEXT: global_store_short v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11TRUE16-LABEL: test_load_store: -; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11FAKE16-LABEL: test_load_store: -; GFX11FAKE16: ; %bb.0: -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_load_store: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out ret void @@ -3652,21 +3644,13 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %ou ; GFX10-NEXT: global_store_short v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11TRUE16-LABEL: test_bitcast_from_bfloat: -; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11FAKE16-LABEL: test_bitcast_from_bfloat: -; GFX11FAKE16: ; %bb.0: -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_bitcast_from_bfloat: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 store i16 %val_int, ptr addrspace(1) %out @@ -3726,21 +3710,13 @@ define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GFX10-NEXT: global_store_short v[0:1], v2, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11TRUE16-LABEL: test_bitcast_to_bfloat: -; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: global_load_d16_b16 v2, v[2:3], off -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v2, off -; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11FAKE16-LABEL: test_bitcast_to_bfloat: -; GFX11FAKE16: ; %bb.0: -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11FAKE16-NEXT: global_load_u16 v2, v[2:3], off -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off -; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_bitcast_to_bfloat: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v2, v[2:3], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %val = load i16, ptr addrspace(1) %in %val_fp = bitcast i16 %val to bfloat store bfloat %val_fp, ptr addrspace(1) %out @@ -5676,23 +5652,14 @@ define bfloat @test_alloca_load_store_ret(bfloat %in) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11TRUE16-LABEL: test_alloca_load_store_ret: -; GFX11TRUE16: ; %bb.0: ; %entry -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GFX11TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11FAKE16-LABEL: test_alloca_load_store_ret: -; GFX11FAKE16: ; %bb.0: ; %entry -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GFX11FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc -; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_alloca_load_store_ret: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_store_b16 off, v0, s32 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %in.addr = alloca bfloat, align 2, addrspace(5) store volatile bfloat %in, ptr addrspace(5) %in.addr, align 2 @@ -45509,34 +45476,34 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11TRUE16-NEXT: s_clause 0x1f -; GFX11TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32 +; GFX11TRUE16-NEXT: scratch_load_u16 v31, off, s32 ; GFX11TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:68 ; GFX11TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:72 -; GFX11TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:124 -; GFX11TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:128 -; GFX11TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:64 -; GFX11TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:60 -; GFX11TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:120 -; GFX11TRUE16-NEXT: scratch_load_b32 v39, off, s32 offset:56 -; GFX11TRUE16-NEXT: scratch_load_b32 v48, off, s32 offset:116 -; GFX11TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:52 -; GFX11TRUE16-NEXT: scratch_load_b32 v50, off, s32 offset:112 -; GFX11TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:48 -; GFX11TRUE16-NEXT: scratch_load_b32 v52, off, s32 offset:108 -; GFX11TRUE16-NEXT: scratch_load_b32 v53, off, s32 offset:44 -; GFX11TRUE16-NEXT: scratch_load_b32 v54, off, s32 offset:104 -; GFX11TRUE16-NEXT: scratch_load_b32 v55, off, s32 offset:40 -; GFX11TRUE16-NEXT: scratch_load_b32 v64, off, s32 offset:100 -; GFX11TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:36 -; GFX11TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:96 -; GFX11TRUE16-NEXT: scratch_load_b32 v67, off, s32 offset:32 -; GFX11TRUE16-NEXT: scratch_load_b32 v68, off, s32 offset:92 -; GFX11TRUE16-NEXT: scratch_load_b32 v69, off, s32 offset:28 -; GFX11TRUE16-NEXT: scratch_load_b32 v70, off, s32 offset:88 -; GFX11TRUE16-NEXT: scratch_load_b32 v71, off, s32 offset:24 -; GFX11TRUE16-NEXT: scratch_load_b32 v80, off, s32 offset:84 -; GFX11TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:20 -; GFX11TRUE16-NEXT: scratch_load_b32 v82, off, s32 offset:76 +; GFX11TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:76 +; GFX11TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:124 +; GFX11TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:128 +; GFX11TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:64 +; GFX11TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:60 +; GFX11TRUE16-NEXT: scratch_load_b32 v39, off, s32 offset:120 +; GFX11TRUE16-NEXT: scratch_load_b32 v48, off, s32 offset:56 +; GFX11TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:116 +; GFX11TRUE16-NEXT: scratch_load_b32 v50, off, s32 offset:52 +; GFX11TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:112 +; GFX11TRUE16-NEXT: scratch_load_b32 v52, off, s32 offset:48 +; GFX11TRUE16-NEXT: scratch_load_b32 v53, off, s32 offset:108 +; GFX11TRUE16-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX11TRUE16-NEXT: scratch_load_b32 v55, off, s32 offset:104 +; GFX11TRUE16-NEXT: scratch_load_b32 v64, off, s32 offset:40 +; GFX11TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:100 +; GFX11TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:36 +; GFX11TRUE16-NEXT: scratch_load_b32 v67, off, s32 offset:96 +; GFX11TRUE16-NEXT: scratch_load_b32 v68, off, s32 offset:32 +; GFX11TRUE16-NEXT: scratch_load_b32 v69, off, s32 offset:92 +; GFX11TRUE16-NEXT: scratch_load_b32 v70, off, s32 offset:28 +; GFX11TRUE16-NEXT: scratch_load_b32 v71, off, s32 offset:88 +; GFX11TRUE16-NEXT: scratch_load_b32 v80, off, s32 offset:24 +; GFX11TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:84 +; GFX11TRUE16-NEXT: scratch_load_b32 v82, off, s32 offset:20 ; GFX11TRUE16-NEXT: scratch_load_b32 v83, off, s32 offset:80 ; GFX11TRUE16-NEXT: scratch_load_b32 v84, off, s32 offset:16 ; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:12 @@ -45606,45 +45573,45 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v26 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32) ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v31 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v35.l, v36.l, s26 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(26) -; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v34.l, v37.l, s27 -; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v34.h, v37.h, s28 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(24) -; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v38.l, v39.l, s29 -; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v38.h, v39.h, s25 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(22) -; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v48.l, v49.l, s24 -; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v48.h, v49.h, s23 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v50.l, v51.l, s22 -; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v50.h, v51.h, s21 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v52.l, v53.l, s20 -; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v52.h, v53.h, s19 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v54.l, v55.l, s18 -; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v54.h, v55.h, s17 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v64.l, v65.l, s16 -; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v64.h, v65.h, s15 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11TRUE16-NEXT: v_cndmask_b16 v7.l, v66.l, v67.l, s14 -; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v66.h, v67.h, s13 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v68.l, v69.l, s12 -; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v68.h, v69.h, s11 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v70.l, v71.l, s10 -; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v70.h, v71.h, s9 -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v80.l, v81.l, s8 -; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v80.h, v81.h, s7 +; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v36.l, v37.l, s26 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(25) +; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v38.l, s27 +; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v35.h, v38.h, s28 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(23) +; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v39.l, v48.l, s29 +; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v39.h, v48.h, s25 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(21) +; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v49.l, v50.l, s24 +; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v49.h, v50.h, s23 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(19) +; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v51.l, v52.l, s22 +; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v51.h, v52.h, s21 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(17) +; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v53.l, v54.l, s20 +; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v53.h, v54.h, s19 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(15) +; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v55.l, v64.l, s18 +; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v55.h, v64.h, s17 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(13) +; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v65.l, v66.l, s16 +; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v65.h, v66.h, s15 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(11) +; GFX11TRUE16-NEXT: v_cndmask_b16 v7.l, v67.l, v68.l, s14 +; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v67.h, v68.h, s13 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(9) +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v69.l, v70.l, s12 +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v69.h, v70.h, s11 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(7) +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v71.l, v80.l, s10 +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v71.h, v80.h, s9 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(5) +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v81.l, v82.l, s8 +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v81.h, v82.h, s7 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(3) ; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v83.l, v84.l, s6 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v82.l, v85.l, s4 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v34.l, v85.l, s4 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v33.l, v86.l, s2 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -45652,9 +45619,9 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 ; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v32.h, v87.h, vcc_lo ; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v33.h, v86.h, s1 -; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v82.h, v85.h, s3 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v34.h, v85.h, s3 ; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v83.h, v84.h, s5 -; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v35.h, v36.h, s0 +; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v36.h, v37.h, s0 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_vselect_v32bf16: diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx11.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx11.ll index dd389375b0d77..2b6d9cc349278 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx11.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx11.ll @@ -18,15 +18,15 @@ define amdgpu_kernel void @long_forward_branch_gfx11plus(ptr addrspace(1) %in, p ; GFX11-NEXT: s_setpc_b64 s[6:7] ; GFX11-NEXT: .LBB0_1: ; %bb2 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_load_d16_b16 v0, v1, s[0:1] -; GFX11-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 +; GFX11-NEXT: global_load_u16 v0, v2, s[0:1] +; GFX11-NEXT: global_load_u16 v1, v2, s[0:1] offset:2 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: global_store_b16 v1, v0, s[2:3] +; GFX11-NEXT: global_store_b16 v2, v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_d16_hi_b16 v1, v0, s[2:3] offset:2 +; GFX11-NEXT: global_store_b16 v2, v1, s[2:3] offset:2 ; GFX11-NEXT: .LBB0_2: ; %bb3 ; GFX11-NEXT: s_endpgm bb0: diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index b8dd377377dab..0eab82778c8db 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -5091,7 +5091,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 8 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, off offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u8 v0, off, off offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, off offset:12 ; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index d374ed072cdc6..2e1f3cf638aa9 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -48,25 +48,15 @@ define <2 x half> @chain_hi_to_lo_private() { ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_private: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, 2 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s0 -; GFX11-TRUE16-NEXT: s_mov_b32 s0, 0 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_private: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, 2 -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, s0 -; GFX11-FAKE16-NEXT: s_mov_b32 s0, 0 -; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v0, off, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_private: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, 2 +; GFX11-NEXT: scratch_load_u16 v0, off, s0 +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: scratch_load_d16_hi_b16 v0, off, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds half, ptr addrspace(5) null, i64 1 %load_lo = load half, ptr addrspace(5) %gep_lo @@ -114,21 +104,13 @@ define <2 x half> @chain_hi_to_lo_private_different_bases(ptr addrspace(5) %base ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_private_different_bases: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, v0, off -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, v1, off -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_private_different_bases: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: scratch_load_u16 v0, v0, off -; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v0, v1, off -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_private_different_bases: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u16 v0, v0, off +; GFX11-NEXT: scratch_load_d16_hi_b16 v0, v1, off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %load_lo = load half, ptr addrspace(5) %base_lo %load_hi = load half, ptr addrspace(5) %base_hi @@ -325,29 +307,17 @@ define <2 x half> @chain_hi_to_lo_global() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_global: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_global: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-FAKE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_global: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 2 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: global_load_d16_hi_b16 v0, v[1:2], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds half, ptr addrspace(1) null, i64 1 %load_lo = load half, ptr addrspace(1) %gep_lo @@ -377,21 +347,13 @@ define <2 x half> @chain_hi_to_lo_global_different_bases(ptr addrspace(1) %base_ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_global_different_bases: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_global_different_bases: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_global_different_bases: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %load_lo = load half, ptr addrspace(1) %base_lo %load_hi = load half, ptr addrspace(1) %base_hi @@ -459,29 +421,17 @@ define <2 x half> @chain_hi_to_lo_flat(ptr inreg %ptr) { ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] offset:2 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_flat: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: flat_load_u16 v0, v[0:1] offset:2 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds half, ptr %ptr, i64 1 %load_lo = load half, ptr %gep_lo @@ -512,23 +462,14 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: -; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: -; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u16 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %load_lo = load half, ptr %base_lo %load_hi = load half, ptr %base_hi @@ -677,23 +618,25 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[0:1] offset:2 +; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:2 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[0:1] offset:4 +; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:4 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, off offset:2 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, off +; GFX11-TRUE16-NEXT: scratch_load_u16 v3, off, off offset:2 +; GFX11-TRUE16-NEXT: scratch_load_u16 v0, off, off +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v3 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l ; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[2:3] @@ -963,7 +906,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) { ; GFX11-TRUE16-LABEL: chain_hi_to_lo_global_other_dep: ; GFX11-TRUE16: ; %bb.0: ; %bb ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v2, v[0:1], off offset:2 glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v[0:1], off offset:2 glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -1037,7 +980,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) { ; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: ; GFX11-TRUE16: ; %bb.0: ; %bb ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc +; GFX11-TRUE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll index ccdc0b1bf43c4..5d74fe3d3c470 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -490,7 +490,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_denorm(ptr addrspace(1) %out, ptr ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, v0.l, 1.0 clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -573,7 +573,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(ptr addrspace(1) %ou ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, v0.l, 1.0 clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -1555,18 +1555,18 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %ou ; GFX11-TRUE16-LABEL: v_no_clamp_add_src_v2f16_f16_src: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0 +; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: v_no_clamp_add_src_v2f16_f16_src: @@ -1969,7 +1969,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_denorm_minimumnum_maximumnum(ptr ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, v0.l, 1.0 clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -2052,7 +2052,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals_minimumnum_maximumnu ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, v0.l, 1.0 clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll index 5eb6b2f58474d..04f2a5ed1a5fd 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -594,7 +594,7 @@ define amdgpu_kernel void @v_clamp_f16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, v0.l, v0.l clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -702,7 +702,7 @@ define amdgpu_kernel void @v_clamp_neg_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -811,7 +811,7 @@ define amdgpu_kernel void @v_clamp_negabs_f16(ptr addrspace(1) %out, ptr addrspa ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -|v0.l|, -|v0.l| clamp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 5fb50d0d89530..d896082e858c0 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -3,6 +3,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=kaveri < %s | FileCheck -enable-var-scope -check-prefixes=CI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-D16-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s declare half @llvm.fabs.f16(half) #0 @@ -57,6 +58,15 @@ define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace( ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_undef_value_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_undef_value_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -111,12 +121,23 @@ define amdgpu_kernel void @v_test_canonicalize_var_f16(ptr addrspace(1) %out) #1 ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v0, off ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: global_store_b16 v[0:1], v0, off +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -179,6 +200,17 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1 ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: s_test_canonicalize_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_clause 0x1 +; GFX11-D16-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, s2, s2 +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: s_test_canonicalize_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_clause 0x1 @@ -229,6 +261,14 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 ; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_build_vector_v2f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-D16-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_build_vector_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -284,12 +324,23 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(ptr addrspace(1) %ou ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_fabs_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, |v0.l|, |v0.l| +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_fabs_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -349,12 +400,23 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(ptr addrspace(1 ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -|v0.l|, -|v0.l| ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_fneg_fabs_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, -|v0.l|, -|v0.l| +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_fneg_fabs_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -415,12 +477,23 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(ptr addrspace(1) %ou ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_fneg_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_fneg_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -480,12 +553,23 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(ptr add ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_no_denormals_canonicalize_fneg_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, -v0.l, -v0.l +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_no_denormals_canonicalize_fneg_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -545,12 +629,23 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(pt ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e64 v0.l, -|v0.l|, -|v0.l| ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e64 v0.l, -|v0.l|, -|v0.l| +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -607,6 +702,15 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f16(ptr addrspace(1) %out) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_p0_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_p0_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -658,6 +762,15 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f16(ptr addrspace(1) %out) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_n0_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_n0_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -709,6 +822,15 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f16(ptr addrspace(1) %out) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_p1_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_p1_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -760,6 +882,15 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f16(ptr addrspace(1) %out) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_n1_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xbc00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_n1_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -811,6 +942,15 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f16(ptr addrspace(1) % ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_literal_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4c00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_literal_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -862,6 +1002,15 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f1 ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_default_denormals_fold_canonicalize_denormal0_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3ff +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_default_denormals_fold_canonicalize_denormal0_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -913,6 +1062,15 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(ptr ad ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_denormals_fold_canonicalize_denormal0_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3ff +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_denormals_fold_canonicalize_denormal0_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -964,6 +1122,15 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f1 ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_default_denormals_fold_canonicalize_denormal1_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x83ff +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_default_denormals_fold_canonicalize_denormal1_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1015,6 +1182,15 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(ptr ad ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_denormals_fold_canonicalize_denormal1_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x83ff +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_denormals_fold_canonicalize_denormal1_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1066,6 +1242,15 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(ptr addrspace(1) %out ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_qnan_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7c00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_qnan_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1117,6 +1302,15 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(ptr addrsp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_qnan_value_neg1_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_qnan_value_neg1_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1168,6 +1362,15 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(ptr addrsp ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_qnan_value_neg2_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_qnan_value_neg2_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1219,6 +1422,15 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(ptr addrspace( ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_snan0_value_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_snan0_value_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1270,6 +1482,15 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(ptr addrspace( ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_snan1_value_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_snan1_value_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1321,6 +1542,15 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(ptr addrspace( ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_snan2_value_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_snan2_value_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -1372,6 +1602,15 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(ptr addrspace( ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; +; GFX11-D16-TRUE16-LABEL: test_fold_canonicalize_snan3_value_f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7e00 +; GFX11-D16-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-D16-TRUE16-NEXT: s_endpgm +; ; GFX11-FAKE16-LABEL: test_fold_canonicalize_snan3_value_f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -2572,6 +2811,14 @@ define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 { ; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_reg_undef_v2f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-D16-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 0 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_reg_undef_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2611,6 +2858,12 @@ define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 { ; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v0.l, v0.l ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_undef_reg_v2f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.h, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_undef_reg_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2774,6 +3027,14 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 { ; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 2.0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_reg_k_v2f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-D16-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 2.0 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_reg_k_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2818,6 +3079,14 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 { ; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, 2.0, v0.l ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_k_reg_v2f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-D16-TRUE16-NEXT: v_pack_b32_f16 v0, 2.0, v0.l +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_k_reg_v2f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2913,6 +3182,15 @@ define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1 ; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_reg_undef_undef_undef_v4f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0x7e007e00 +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-D16-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 0 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_reg_undef_undef_undef_v4f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2965,6 +3243,15 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal ; GFX11-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_reg_reg_undef_undef_v4f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-D16-TRUE16-NEXT: v_mov_b32_e32 v1, 0x7e007e00 +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-D16-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_reg_reg_undef_undef_v4f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3022,6 +3309,16 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half ; GFX11-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +; GFX11-D16-TRUE16-LABEL: v_test_canonicalize_reg_undef_reg_reg_v4f16: +; GFX11-D16-TRUE16: ; %bb.0: +; GFX11-D16-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-D16-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l +; GFX11-D16-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l +; GFX11-D16-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-D16-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, 0 +; GFX11-D16-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX11-D16-TRUE16-NEXT: s_setpc_b64 s[30:31] +; ; GFX11-FAKE16-LABEL: v_test_canonicalize_reg_undef_reg_reg_v4f16: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll index d32b528d13276..f9a3a55fe69fd 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -2410,7 +2410,7 @@ define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %a ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[2:3] @@ -2792,7 +2792,7 @@ define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll index 210e09fd9169a..784363035e7de 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll @@ -137,33 +137,33 @@ define amdgpu_kernel void @v_fdiv_f16( ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[4:5] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v0.l -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v2.l +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v2.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v3, v3 +; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v3 +; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v0 ; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v5, v4, v6 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v4, v7, v0 ; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v4, v6 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v5, v3 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff800000, v3 +; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v5, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff800000, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v3, v4 -; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.h, v1.l, v0.l -; GFX11-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v3.l, v2.l +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: v_fdiv_f16: @@ -293,7 +293,7 @@ define amdgpu_kernel void @v_rcp_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) # ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -400,7 +400,7 @@ define amdgpu_kernel void @v_rcp_f16_abs(ptr addrspace(1) %r, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, |v0.l| ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -510,7 +510,7 @@ define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrs ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -604,7 +604,7 @@ define amdgpu_kernel void @v_rcp_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -711,7 +711,7 @@ define amdgpu_kernel void @v_rcp_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rcp_f16_e64 v0.l, -v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -821,7 +821,7 @@ define amdgpu_kernel void @v_rsq_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) # ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -935,7 +935,7 @@ define amdgpu_kernel void @v_rsq_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff @@ -1058,12 +1058,12 @@ define amdgpu_kernel void @v_rsq_f16_multi_use(ptr addrspace(1) %r, ptr addrspac ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.h, v0.l -; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc +; GFX11-TRUE16-NEXT: v_rsq_f16_e32 v0.l, v2.l +; GFX11-TRUE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: v_rsq_f16_multi_use: @@ -1177,7 +1177,7 @@ define amdgpu_kernel void @v_rsq_f16_missing_contract0(ptr addrspace(1) %r, ptr ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff @@ -1295,7 +1295,7 @@ define amdgpu_kernel void @v_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff @@ -1413,7 +1413,7 @@ define amdgpu_kernel void @v_neg_rsq_f16_missing_contract1(ptr addrspace(1) %r, ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_sqrt_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff @@ -1536,13 +1536,13 @@ define amdgpu_kernel void @v_fdiv_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[4:5] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h +; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v2.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -1666,13 +1666,13 @@ define amdgpu_kernel void @v_fdiv_f16_unsafe(ptr addrspace(1) %r, ptr addrspace( ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[4:5] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.h, v0.h +; GFX11-TRUE16-NEXT: v_rcp_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v2.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -1758,7 +1758,7 @@ define amdgpu_kernel void @div_afn_2_x_pat_f16(ptr addrspace(1) %out) #0 { ; ; GFX11-TRUE16-LABEL: div_afn_2_x_pat_f16: ; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -1834,7 +1834,7 @@ define amdgpu_kernel void @div_afn_k_x_pat_f16(ptr addrspace(1) %out) #0 { ; ; GFX11-TRUE16-LABEL: div_afn_k_x_pat_f16: ; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -1910,7 +1910,7 @@ define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(ptr addrspace(1) %out) #0 { ; ; GFX11-TRUE16-LABEL: div_afn_neg_k_x_pat_f16: ; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll index 91f9aa1c5fe3b..14e58a2637390 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -225,7 +225,7 @@ define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) #0 { ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc{{$}} ; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} -; GFX11-TRUE16: flat_load_d16_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} +; GFX11-TRUE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} ; GFX11-FAKE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 { %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095 @@ -237,7 +237,7 @@ define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 { ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} -; GFX11-TRUE16: flat_load_d16_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} +; GFX11-TRUE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} ; GFX11-FAKE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) #0 { %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index fc8883924dfbc..1f8cee42686bd 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -4585,7 +4585,7 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg) ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, 1 ; GFX11-TRUE16-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, v0, off offset:-1 glc dlc +; GFX11-TRUE16-NEXT: scratch_load_u8 v0, v0, off offset:-1 glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4678,7 +4678,7 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg) ; GFX11-PAL-TRUE16-NEXT: v_mov_b16_e32 v1.l, 1 ; GFX11-PAL-TRUE16-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc ; GFX11-PAL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-TRUE16-NEXT: scratch_load_d16_u8 v0, v0, off offset:-1 glc dlc +; GFX11-PAL-TRUE16-NEXT: scratch_load_u8 v0, v0, off offset:-1 glc dlc ; GFX11-PAL-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4758,7 +4758,7 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 1 ; GFX11-TRUE16-NEXT: scratch_store_b8 v1, v0, off offset:-129 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, v1, off offset:-129 glc dlc +; GFX11-TRUE16-NEXT: scratch_load_u8 v0, v1, off offset:-129 glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4853,7 +4853,7 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture ; GFX11-PAL-TRUE16-NEXT: v_mov_b16_e32 v0.l, 1 ; GFX11-PAL-TRUE16-NEXT: scratch_store_b8 v1, v0, off offset:-129 dlc ; GFX11-PAL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-TRUE16-NEXT: scratch_load_d16_u8 v0, v1, off offset:-129 glc dlc +; GFX11-PAL-TRUE16-NEXT: scratch_load_u8 v0, v1, off offset:-129 glc dlc ; GFX11-PAL-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index 57be2907da4a0..d20fa41837e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -7950,7 +7950,7 @@ define amdgpu_kernel void @atomic_load_i8_offset(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u8 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -8026,7 +8026,7 @@ define amdgpu_kernel void @atomic_load_i8(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc +; GFX11-TRUE16-NEXT: flat_load_u8 v0, v[0:1] glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -8119,7 +8119,7 @@ define amdgpu_kernel void @atomic_load_i8_addr64_offset(ptr %in, ptr %out, i64 % ; GFX11-TRUE16-NEXT: s_addc_u32 s1, s1, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u8 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -8407,7 +8407,7 @@ define amdgpu_kernel void @atomic_load_i16_offset(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -8483,7 +8483,7 @@ define amdgpu_kernel void @atomic_load_i16(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -8580,7 +8580,7 @@ define amdgpu_kernel void @atomic_load_i16_addr64_offset(ptr %in, ptr %out, i64 ; GFX11-TRUE16-NEXT: s_addc_u32 s1, s1, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -10599,7 +10599,7 @@ define amdgpu_kernel void @atomic_load_f16_offset(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -10674,7 +10674,7 @@ define amdgpu_kernel void @atomic_load_f16(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -10752,7 +10752,7 @@ define amdgpu_kernel void @atomic_load_bf16_offset(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -10827,7 +10827,7 @@ define amdgpu_kernel void @atomic_load_bf16(ptr %in, ptr %out) { ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s3 -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] glc +; GFX11-TRUE16-NEXT: flat_load_u16 v0, v[0:1] glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.gfx11plus.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.gfx11plus.ll index 5f86f2e48137d..de2e01f1cdf04 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.gfx11plus.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.gfx11plus.ll @@ -29,13 +29,14 @@ define amdgpu_kernel void @fma_v2f16_divergent( ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]](s32), killed [[S_MOV_B32_]], implicit $exec ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 killed [[S_MOV_B32_1]], killed [[V_AND_B32_e64_]], implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 killed [[REG_SEQUENCE1]], killed [[V_LSHLREV_B32_e64_]], 0, 0, implicit $exec :: (load (s16) from %ir.f.gep, addrspace 1) - ; GFX11-NEXT: [[V_AND_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_AND_B16_t16_e64 0, 32767, 0, [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], 0, implicit $exec - ; GFX11-NEXT: [[V_XOR_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_XOR_B16_t16_e64 0, -32768, 0, [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], 0, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed [[REG_SEQUENCE1]], killed [[V_LSHLREV_B32_e64_]], 0, 0, implicit $exec :: (load (s16) from %ir.f.gep, addrspace 1) + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_16 = COPY [[GLOBAL_LOAD_USHORT_SADDR]].lo16 + ; GFX11-NEXT: [[V_AND_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_AND_B16_t16_e64 0, 32767, 0, [[COPY10]], 0, implicit $exec + ; GFX11-NEXT: [[V_XOR_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_XOR_B16_t16_e64 0, -32768, 0, [[COPY10]], 0, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def $scc - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; GFX11-NEXT: [[V_CNDMASK_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CNDMASK_B16_t16_e64 0, killed [[V_XOR_B16_t16_e64_]], 0, killed [[V_AND_B16_t16_e64_]], killed [[COPY10]], 0, implicit $exec + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc + ; GFX11-NEXT: [[V_CNDMASK_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CNDMASK_B16_t16_e64 0, killed [[V_XOR_B16_t16_e64_]], 0, killed [[V_AND_B16_t16_e64_]], killed [[COPY11]], 0, implicit $exec ; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX11-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_CNDMASK_B16_t16_e64_]], %subreg.lo16, killed [[DEF]], %subreg.hi16 @@ -88,8 +89,8 @@ define amdgpu_kernel void @fma_v2f16_uniform( ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX8_IMM]].sub7 ; GFX11-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX8_IMM]].sub6 ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY8]], %subreg.sub0, killed [[COPY7]], %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 killed [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s16) from %ir.3, addrspace 1) - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]] + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s16) from %ir.3, addrspace 1) + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[GLOBAL_LOAD_USHORT_SADDR]].lo16 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[COPY9]] ; GFX11-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed [[REG_SEQUENCE2]], 0, 0 :: ("amdgpu-noclobber" load (s32) from %ir.4, addrspace 1) ; GFX11-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed [[REG_SEQUENCE3]], 0, 0 :: ("amdgpu-noclobber" load (s32) from %ir.5, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll index e59fbada6793d..066230e683c3e 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) { @@ -122,15 +123,14 @@ define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) { ; GFX12-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: test_fmaximum_f16_vv: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_maximum_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: test_fmaximum_f16_vv: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: test_fmaximum_f16_vv: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %val = call half @llvm.maximum.f16(half %a, half %b) ret half %val } @@ -171,17 +171,16 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b ; GFX12-SDAG-NEXT: v_pk_maximum_f16 v1, v1, v3 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: test_fmaximum_v3f16_vv: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_pk_maximum_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_maximum_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_vv: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2 ; GFX12-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, v1.l, v3.l ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_vv: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2 -; GFX12-GISEL-FAKE16-NEXT: v_maximum_f16 v1, v1, v3 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %val = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b) ret <3 x half> %val } @@ -335,6 +334,20 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; +; GFX12-GISEL-LABEL: fmaximum_f16_move_to_valu: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_clause 0x1 +; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: global_load_u16 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_maximum_f16 v1, v1, v2 +; GFX12-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX12-GISEL-NEXT: s_endpgm ; GFX12-GISEL-TRUE16-LABEL: fmaximum_f16_move_to_valu: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: s_clause 0x1 @@ -349,21 +362,6 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX12-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h ; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX12-GISEL-TRUE16-NEXT: s_endpgm -; -; GFX12-GISEL-FAKE16-LABEL: fmaximum_f16_move_to_valu: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: s_clause 0x1 -; GFX12-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX12-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] scope:SCOPE_SYS -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_maximum_f16 v1, v1, v2 -; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX12-GISEL-FAKE16-NEXT: s_endpgm %a = load volatile half, ptr addrspace(1) %aptr, align 4 %b = load volatile half, ptr addrspace(1) %bptr, align 4 %v = call half @llvm.maximum.f16(half %a, half %b) @@ -383,3 +381,5 @@ declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) declare double @llvm.maximum.f64(double, double) declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12-GISEL-FAKE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 9233f8059a202..76f6693199281 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -8,7 +8,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: @@ -7547,19 +7548,19 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 -; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 -; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; GFX11-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-GISEL-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 +; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: ; GFX11-SDAG-TRUE16: ; %bb.0: @@ -7568,13 +7569,12 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -7788,26 +7788,26 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 -; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; GFX11-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-GISEL-NEXT: v_add_f16_e32 v2, 2.0, v2 +; GFX11-GISEL-NEXT: v_add_f16_e32 v3, 4.0, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_f16 v1, v1, v2, v3 +; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: ; GFX11-SDAG-TRUE16: ; %bb.0: @@ -7816,20 +7816,19 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] glc dlc +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] glc dlc +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v3.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] ; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -8752,13 +8751,13 @@ define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 { ; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-FAKE16-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-GISEL-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_f16 v0, v0, 2.0, 4.0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-TRUE16-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum: ; GFX11-SDAG-TRUE16: ; %bb.0: @@ -8767,7 +8766,6 @@ define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 { ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX11-GISEL-TRUE16-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -8917,3 +8915,5 @@ declare half @llvm.maxnum.f16(half, half) #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11-GISEL-FAKE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll index b25120f2ece6f..e6936e4bdff6c 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) { @@ -122,15 +123,14 @@ define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) { ; GFX12-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: test_fminimum_f16_vv: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_minimum_f16 v0, v0, v1 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: test_fminimum_f16_vv: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: test_fminimum_f16_vv: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %val = call half @llvm.minimum.f16(half %a, half %b) ret half %val } @@ -171,17 +171,16 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b ; GFX12-SDAG-NEXT: v_pk_minimum_f16 v1, v1, v3 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: test_fminimum_v3f16_vv: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_pk_minimum_f16 v0, v0, v2 +; GFX12-GISEL-NEXT: v_minimum_f16 v1, v1, v3 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: test_fminimum_v3f16_vv: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2 ; GFX12-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v1.l, v3.l ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: test_fminimum_v3f16_vv: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2 -; GFX12-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b) ret <3 x half> %val } @@ -335,6 +334,20 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; +; GFX12-GISEL-LABEL: fminimum_f16_move_to_valu: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_clause 0x1 +; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: global_load_u16 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_minimum_f16 v1, v1, v2 +; GFX12-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX12-GISEL-NEXT: s_endpgm ; GFX12-GISEL-TRUE16-LABEL: fminimum_f16_move_to_valu: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: s_clause 0x1 @@ -349,21 +362,6 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr ; GFX12-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h ; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX12-GISEL-TRUE16-NEXT: s_endpgm -; -; GFX12-GISEL-FAKE16-LABEL: fminimum_f16_move_to_valu: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: s_clause 0x1 -; GFX12-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX12-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] scope:SCOPE_SYS -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v2 -; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX12-GISEL-FAKE16-NEXT: s_endpgm %a = load volatile half, ptr addrspace(1) %aptr, align 4 %b = load volatile half, ptr addrspace(1) %bptr, align 4 %v = call half @llvm.minimum.f16(half %a, half %b) @@ -383,3 +381,5 @@ declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>) declare double @llvm.minimum.f64(double, double) declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12-GISEL-FAKE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll index 51b6d17312ed7..d258329128994 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -97,18 +97,18 @@ define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-FLUSH-TRUE16-LABEL: fmuladd_f16: ; GFX11-FLUSH-TRUE16: ; %bb.0: ; GFX11-FLUSH-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x2 -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[4:5] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[6:7] ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v3.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: fmuladd_f16: @@ -131,15 +131,15 @@ define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-DENORM-STRICT-TRUE16-LABEL: fmuladd_f16: ; GFX11-DENORM-STRICT-TRUE16: ; %bb.0: ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v2.l, v3.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_f16: @@ -159,15 +159,15 @@ define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-DENORM-CONTRACT-TRUE16-LABEL: fmuladd_f16: ; GFX11-DENORM-CONTRACT-TRUE16: ; %bb.0: ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v2.l, v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_f16: @@ -282,18 +282,18 @@ define amdgpu_kernel void @fmul_fadd_f16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX11-FLUSH-TRUE16-LABEL: fmul_fadd_f16: ; GFX11-FLUSH-TRUE16: ; %bb.0: ; GFX11-FLUSH-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x2 -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[4:5] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[6:7] ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v3.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: fmul_fadd_f16: @@ -316,18 +316,18 @@ define amdgpu_kernel void @fmul_fadd_f16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX11-DENORM-STRICT-TRUE16-LABEL: fmul_fadd_f16: ; GFX11-DENORM-STRICT-TRUE16: ; %bb.0: ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[4:5] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[6:7] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v3.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmul_fadd_f16: @@ -350,15 +350,15 @@ define amdgpu_kernel void @fmul_fadd_f16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX11-DENORM-CONTRACT-TRUE16-LABEL: fmul_fadd_f16: ; GFX11-DENORM-CONTRACT-TRUE16: ; %bb.0: ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v2.l, v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmul_fadd_f16: @@ -458,18 +458,18 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr add ; GFX11-FLUSH-TRUE16-LABEL: fmul_fadd_contract_f16: ; GFX11-FLUSH-TRUE16: ; %bb.0: ; GFX11-FLUSH-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x2 -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[4:5] +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[6:7] ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v3.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: fmul_fadd_contract_f16: @@ -492,15 +492,15 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr add ; GFX11-DENORM-STRICT-TRUE16-LABEL: fmul_fadd_contract_f16: ; GFX11-DENORM-STRICT-TRUE16: ; %bb.0: ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v2.l, v3.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmul_fadd_contract_f16: @@ -520,15 +520,15 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr add ; GFX11-DENORM-CONTRACT-TRUE16-LABEL: fmul_fadd_contract_f16: ; GFX11-DENORM-CONTRACT-TRUE16: ; %bb.0: ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_clause 0x2 -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v2.l, v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmul_fadd_contract_f16: @@ -625,13 +625,13 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -657,14 +657,14 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_2.0_a_b_f16: @@ -687,14 +687,14 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_2.0_a_b_f16: @@ -795,13 +795,13 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -827,14 +827,14 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_a_2.0_b_f16: @@ -857,14 +857,14 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_a_2.0_b_f16: @@ -979,13 +979,13 @@ define amdgpu_kernel void @fadd_a_a_b_f16(ptr addrspace(1) %out, ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1013,13 +1013,13 @@ define amdgpu_kernel void @fadd_a_a_b_f16(ptr addrspace(1) %out, ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; @@ -1045,14 +1045,14 @@ define amdgpu_kernel void @fadd_a_a_b_f16(ptr addrspace(1) %out, ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fadd_a_a_b_f16: @@ -1170,13 +1170,13 @@ define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out, ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v2.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1204,13 +1204,13 @@ define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out, ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v2.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; @@ -1236,14 +1236,14 @@ define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out, ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fadd_b_a_a_f16: @@ -1347,13 +1347,13 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v2.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1379,14 +1379,14 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, -2.0, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, -2.0, v1.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_neg_2.0_a_b_f16: @@ -1409,14 +1409,14 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, -2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, -2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_neg_2.0_a_b_f16: @@ -1517,13 +1517,13 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, pt ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v2.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1549,14 +1549,14 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, pt ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_neg_2.0_neg_a_b_f16: @@ -1579,14 +1579,14 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, pt ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_neg_2.0_neg_a_b_f16: @@ -1689,13 +1689,13 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v2.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1721,14 +1721,14 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, -2.0, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, -2.0, v1.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: fmuladd_2.0_neg_a_b_f16: @@ -1751,14 +1751,14 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, -2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, -2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fmuladd_2.0_neg_a_b_f16: @@ -1861,13 +1861,13 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -1895,11 +1895,11 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; @@ -1925,11 +1925,11 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr ad ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v0.h +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v2.l ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; @@ -2065,18 +2065,18 @@ define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v3.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: mad_sub_f16: @@ -2103,18 +2103,18 @@ define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v3.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: mad_sub_f16: @@ -2141,16 +2141,16 @@ define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, -v1.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v2.l, -v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: mad_sub_f16: @@ -2289,18 +2289,18 @@ define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v1.l, v0.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v3.l, v0.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: mad_sub_inv_f16: @@ -2327,18 +2327,18 @@ define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v1.l, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v3.l, v0.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: mad_sub_inv_f16: @@ -2365,16 +2365,16 @@ define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v0.h, v1.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v2.l, v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: mad_sub_inv_f16: @@ -2513,18 +2513,18 @@ define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture % ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e64 v0.l, v0.l, |v1.l| -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e64 v0.l, v0.l, |v3.l| +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: mad_sub_fabs_f16: @@ -2551,18 +2551,18 @@ define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture % ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e64 v0.l, v0.l, |v1.l| -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e64 v0.l, v0.l, |v3.l| +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: mad_sub_fabs_f16: @@ -2589,16 +2589,16 @@ define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture % ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, -|v1.l| -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v2.l, -|v3.l| +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: mad_sub_fabs_f16: @@ -2738,18 +2738,18 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocaptu ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e64 v0.l, |v1.l|, v0.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e64 v0.l, |v3.l|, v0.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: mad_sub_fabs_inv_f16: @@ -2776,18 +2776,18 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocaptu ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e64 v0.l, |v1.l|, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e64 v0.l, |v3.l|, v0.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: mad_sub_fabs_inv_f16: @@ -2814,16 +2814,16 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocaptu ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v0.h, |v1.l| -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v2.l, |v3.l| +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: mad_sub_fabs_inv_f16: @@ -2963,18 +2963,18 @@ define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v1.l, v0.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v3.l, v0.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: neg_neg_mad_f16: @@ -3001,18 +3001,18 @@ define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v1.l, v0.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v3.l, v0.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: neg_neg_mad_f16: @@ -3039,16 +3039,16 @@ define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %o ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v0, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v1, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v1.l, v2.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v3, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: neg_neg_mad_f16: @@ -3189,18 +3189,18 @@ define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture % ; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, |v0.h| +; GFX11-FLUSH-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, |v2.l| ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v3.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: mad_fabs_sub_f16: @@ -3227,18 +3227,18 @@ define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture % ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, |v0.h| +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, |v2.l| ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v3.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-STRICT-FAKE16-LABEL: mad_fabs_sub_f16: @@ -3265,16 +3265,16 @@ define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture % ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[2:3] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[2:3] offset:4 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v3, v1, s[2:3] offset:4 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, |v0.h|, -v1.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, |v2.l|, -v3.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: mad_fabs_sub_f16: @@ -3396,13 +3396,13 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v2.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -3430,13 +3430,13 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.h, v0.l +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v2.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; @@ -3462,14 +3462,14 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v0.h, -2.0, v0.l -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fmac_f16_e32 v2.l, -2.0, v1.l +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-CONTRACT-FAKE16-LABEL: fsub_c_fadd_a_a_f16: @@ -3586,13 +3586,13 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FLUSH-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-FLUSH-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v0.h +; GFX11-FLUSH-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v2.l ; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; @@ -3620,13 +3620,13 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-STRICT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DENORM-STRICT-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v0.h +; GFX11-DENORM-STRICT-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v2.l ; GFX11-DENORM-STRICT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-STRICT-TRUE16-NEXT: s_endpgm ; @@ -3654,11 +3654,11 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f16(ptr addrspace(1) %out, ptr addrsp ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[0:1] offset:2 glc dlc +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_load_u16 v2, v1, s[0:1] offset:2 glc dlc ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v0.h +; GFX11-DENORM-CONTRACT-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, 2.0, -v2.l ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-DENORM-CONTRACT-TRUE16-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll index 64a9727330cfd..a3fa2f46538d2 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll @@ -399,7 +399,7 @@ define amdgpu_kernel void @v_fneg_fabs_bf16(ptr addrspace(1) %out, ptr addrspace ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index 9d9a851a5507e..46212d8312d90 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -348,7 +348,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x8000, v0 ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll b/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll index d232693b46ad9..eada9d55a75b2 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.bf16.ll @@ -134,7 +134,7 @@ define amdgpu_kernel void @v_fneg_bf16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -303,51 +303,28 @@ define amdgpu_kernel void @v_fneg_fold_bf16(ptr addrspace(1) %out, ptr addrspace ; GFX9-NEXT: global_store_short_d16_hi v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm ; -; GFX11-TRUE16-LABEL: v_fneg_fold_bf16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v0, s[2:3] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x8000, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-TRUE16-NEXT: v_mul_f32_e32 v1, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1] -; GFX11-TRUE16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: v_fneg_fold_bf16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX11-FAKE16-NEXT: v_mul_f32_e32 v1, v2, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-FAKE16-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: v_fneg_fold_bf16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX11-NEXT: v_add_nc_u32_e32 v2, v2, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 +; GFX11-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm %val = load bfloat, ptr addrspace(1) %in %fsub = fsub bfloat -0.0, %val %fmul = fmul bfloat %fsub, %val diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll index cab27fca5ab0a..b0213dd33ee36 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll @@ -132,7 +132,7 @@ define amdgpu_kernel void @v_fneg_f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -285,7 +285,7 @@ define amdgpu_kernel void @v_fneg_fold_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index f2fe61f5376e4..9ff5b19d711e9 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -306,7 +306,7 @@ ret: ; GFX11-TRUE16-LABEL: tied_operand_test: ; GFX11-TRUE16: ; %bb.0: ; %entry -; GFX11-TRUE16: scratch_load_d16_b16 [[LDRESULT:v[0-9]+]], off, off +; GFX11-TRUE16: scratch_load_u16 [[LDRESULT:v[0-9]+]], off, off ; GFX11-TRUE16: v_mov_b16_e32 [[C:v[0-9]]].{{(l|h)}}, 0x7b ; GFX11-TRUE16-DAG: ds_store_b16 v{{[0-9]+}}, [[LDRESULT]] offset:10 ; GFX11-TRUE16-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll index 308e86bbaf8fd..350d93e3e55e8 100644 --- a/llvm/test/CodeGen/AMDGPU/freeze.ll +++ b/llvm/test/CodeGen/AMDGPU/freeze.ll @@ -5563,29 +5563,13 @@ define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX10-NEXT: global_store_short v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: freeze_i16: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: freeze_i16: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_i16: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: freeze_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %a = load i16, ptr addrspace(1) %ptra %freeze = freeze i16 %a store i16 %freeze, ptr addrspace(1) %ptrb @@ -6214,29 +6198,13 @@ define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX10-NEXT: global_store_short v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: freeze_f16: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: freeze_f16: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_f16: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: freeze_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %a = load half, ptr addrspace(1) %ptra %freeze = freeze half %a store half %freeze, ptr addrspace(1) %ptrb @@ -6871,29 +6839,13 @@ define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX10-NEXT: global_store_short v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: freeze_bf16: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: freeze_bf16: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_bf16: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: freeze_bf16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %a = load bfloat, ptr addrspace(1) %ptra %freeze = freeze bfloat %a store bfloat %freeze, ptr addrspace(1) %ptrb @@ -12151,29 +12103,13 @@ define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX10-NEXT: global_store_byte v[2:3], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: freeze_i8: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: freeze_i8: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_i8: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: freeze_i8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %a = load i8, ptr addrspace(1) %ptra %freeze = freeze i8 %a store i8 %freeze, ptr addrspace(1) %ptrb @@ -12287,21 +12223,13 @@ define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: freeze_v2i8: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: freeze_v2i8: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: freeze_v2i8: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v0, off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: freeze_v2i8: ; GFX11-GISEL: ; %bb.0: @@ -13451,7 +13379,7 @@ define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX11-SDAG-TRUE16-LABEL: freeze_v2i1: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off +; GFX11-SDAG-TRUE16-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 3 ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off @@ -13626,7 +13554,7 @@ define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ; GFX11-SDAG-TRUE16-LABEL: freeze_v3i1: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off +; GFX11-SDAG-TRUE16-NEXT: global_load_u8 v0, v[0:1], off ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 7 ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 20009aee6e7ff..b4098fb96fed6 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -525,23 +525,23 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v0, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[4:5] offset:8 +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] offset:8 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v3, |v1.l| +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v3, |v0.l| ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v2, |v0.l| +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v2, |v1.l| ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v3, v2 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB0_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, 0, v1 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, 0, v0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v3, v2 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v1.l, v4.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v0.l, v4.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB0_3 ; GFX11-TRUE16-NEXT: s_branch .LBB0_8 ; GFX11-TRUE16-NEXT: .LBB0_2: @@ -622,10 +622,10 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v2, v1 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v2, v0 ; GFX11-TRUE16-NEXT: .LBB0_8: ; %Flow19 -; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v0.l -; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v1.l| +; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l +; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l| ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -748,122 +748,124 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX1150-TRUE16-LABEL: frem_f16: ; GFX1150-TRUE16: ; %bb.0: ; GFX1150-TRUE16-NEXT: s_clause 0x1 -; GFX1150-TRUE16-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 -; GFX1150-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 -; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1150-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1150-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 +; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX1150-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX1150-TRUE16-NEXT: s_clause 0x1 -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[10:11] -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v1, v1, s[0:1] offset:8 +; GFX1150-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX1150-TRUE16-NEXT: global_load_u16 v0, v0, s[6:7] offset:8 ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1150-TRUE16-NEXT: s_and_b32 s0, s0, 0x7fff -; GFX1150-TRUE16-NEXT: s_and_b32 s2, s1, 0x7fff -; GFX1150-TRUE16-NEXT: s_cvt_f32_f16 s1, s0 -; GFX1150-TRUE16-NEXT: s_cvt_f32_f16 s0, s2 +; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s3, v0 +; GFX1150-TRUE16-NEXT: s_and_b32 s2, s4, 0x7fff +; GFX1150-TRUE16-NEXT: s_and_b32 s5, s3, 0x7fff +; GFX1150-TRUE16-NEXT: s_cvt_f32_f16 s6, s2 +; GFX1150-TRUE16-NEXT: s_cvt_f32_f16 s5, s5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) -; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s1, s0 +; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB0_2 ; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s1, s0 -; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, v0 -; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 +; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s6, s5 +; GFX1150-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4 +; GFX1150-TRUE16-NEXT: s_cselect_b32 s8, -1, 0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1150-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, v2.l, s3 +; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, s4, v0.l, s8 ; GFX1150-TRUE16-NEXT: s_cbranch_execz .LBB0_3 ; GFX1150-TRUE16-NEXT: s_branch .LBB0_8 ; GFX1150-TRUE16-NEXT: .LBB0_2: -; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0 ; GFX1150-TRUE16-NEXT: .LBB0_3: ; %frem.compute -; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s0 -; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s1 -; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s1 +; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5 +; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6 +; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, 1 -; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v2, 11 -; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v2, s0 +; GFX1150-TRUE16-NEXT: v_ldexp_f32 v1, v1, 1 +; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v0, 11 +; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v0, s5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s1, v5 -; GFX1150-TRUE16-NEXT: v_div_scale_f32 v7, null, v3, v3, 1.0 +; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s6, v3 +; GFX1150-TRUE16-NEXT: v_div_scale_f32 v5, null, v1, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s0, v2 -; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v2, -1, v2 -; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v8, v7 +; GFX1150-TRUE16-NEXT: v_readfirstlane_b32 s5, v0 +; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v0, -1, v0 +; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v6, v5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_not_b32_e32 v6, v2 -; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v6, v6, v5 -; GFX1150-TRUE16-NEXT: v_div_scale_f32 v5, vcc_lo, 1.0, v3, 1.0 +; GFX1150-TRUE16-NEXT: v_not_b32_e32 v4, v0 +; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, v4, v3 +; GFX1150-TRUE16-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_denorm_mode 15 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fma_f32 v9, -v7, v8, 1.0 -; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v8, v9, v8 +; GFX1150-TRUE16-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v6, v7, v6 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v9, v5, v8 -; GFX1150-TRUE16-NEXT: v_fma_f32 v10, -v7, v9, v5 +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v7, v3, v6 +; GFX1150-TRUE16-NEXT: v_fma_f32 v8, -v5, v7, v3 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v9, v10, v8 -; GFX1150-TRUE16-NEXT: v_fma_f32 v5, -v7, v9, v5 +; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v7, v8, v6 +; GFX1150-TRUE16-NEXT: v_fma_f32 v3, -v5, v7, v3 ; GFX1150-TRUE16-NEXT: s_denorm_mode 12 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1150-TRUE16-NEXT: v_div_fmas_f32 v5, v5, v8, v9 -; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 -; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 +; GFX1150-TRUE16-NEXT: v_div_fmas_f32 v3, v3, v6, v7 +; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 +; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB0_7 ; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader -; GFX1150-TRUE16-NEXT: s_sub_i32 s0, s1, s0 +; GFX1150-TRUE16-NEXT: s_sub_i32 s5, s6, s5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1150-TRUE16-NEXT: s_add_i32 s0, s0, 11 +; GFX1150-TRUE16-NEXT: s_add_i32 s5, s5, 11 ; GFX1150-TRUE16-NEXT: .LBB0_5: ; %frem.loop_body ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, v4 -; GFX1150-TRUE16-NEXT: s_add_i32 s0, s0, -11 -; GFX1150-TRUE16-NEXT: s_cmp_gt_i32 s0, 11 +; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2 +; GFX1150-TRUE16-NEXT: s_add_i32 s5, s5, -11 +; GFX1150-TRUE16-NEXT: s_cmp_gt_i32 s5, 11 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v4, v7, v5 -; GFX1150-TRUE16-NEXT: v_rndne_f32_e32 v4, v4 +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v2, v5, v3 +; GFX1150-TRUE16-NEXT: v_rndne_f32_e32 v2, v2 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 -; GFX1150-TRUE16-NEXT: v_fma_f32 v4, v4, v3, v7 +; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX1150-TRUE16-NEXT: v_fma_f32 v2, v2, v1, v5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v4 -; GFX1150-TRUE16-NEXT: v_add_f32_e32 v6, v4, v3 -; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo +; GFX1150-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v2 +; GFX1150-TRUE16-NEXT: v_add_f32_e32 v4, v2, v1 +; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v4, 11 +; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, 11 ; GFX1150-TRUE16-NEXT: s_cbranch_scc1 .LBB0_5 ; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow -; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, s0 -; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, v7 +; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s5 +; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5 ; GFX1150-TRUE16-NEXT: .LBB0_7: ; %frem.loop_exit ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 -; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 +; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 +; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v5, v4, v5 -; GFX1150-TRUE16-NEXT: v_rndne_f32_e32 v5, v5 +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v3, v2, v3 +; GFX1150-TRUE16-NEXT: v_rndne_f32_e32 v3, v3 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 -; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v4, v5, v3 +; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 +; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v2, v3, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v4 -; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo +; GFX1150-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v2 +; GFX1150-TRUE16-NEXT: v_add_f32_e32 v1, v2, v1 +; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 -; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 +; GFX1150-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 +; GFX1150-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, s4 ; GFX1150-TRUE16-NEXT: .LBB0_8: ; %Flow19 -; GFX1150-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x7fff, v0 -; GFX1150-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1150-TRUE16-NEXT: v_cmp_nle_f16_e64 s0, 0x7c00, v0.l -; GFX1150-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo -; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, s0 -; GFX1150-TRUE16-NEXT: global_store_b16 v3, v0, s[8:9] +; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s3, 0 +; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 +; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00 +; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1150-TRUE16-NEXT: s_and_b32 s2, s2, s3 +; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2 +; GFX1150-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX1150-TRUE16-NEXT: s_endpgm ; ; GFX1150-FAKE16-LABEL: frem_f16: @@ -1424,37 +1426,35 @@ define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[4:5] offset:8 +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] offset:8 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v3.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v4, v4 ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v4 -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v5, v3, v6 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v4 +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v0, v2 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v7, v4 -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, v6 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v0, v5, v4 +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v0, v2 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v5, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff800000, v4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v4, v0 +; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.l, v0.l -; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.h, v0.h +; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v3.l, v2.l +; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v1.l, v0.l -; GFX11-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v3.l, v2.l +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: fast_frem_f16: @@ -1501,35 +1501,34 @@ define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX1150-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX1150-TRUE16-NEXT: s_clause 0x1 -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[4:5] offset:8 +; GFX1150-TRUE16-NEXT: global_load_u16 v3, v2, s[2:3] +; GFX1150-TRUE16-NEXT: global_load_u16 v4, v2, s[4:5] offset:8 ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l +; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v3.l ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v1.l -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(TRANS32_DEP_1) -; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v4, v4 -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v4 +; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v4.l +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1) +; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v1, v1 +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v5, v3, v6 op_sel_hi:[1,0,1] -; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v3, v7, v4 +; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v4, v0, v3 op_sel_hi:[1,0,1] +; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0, v5, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, v6 op_sel_hi:[1,0,1] -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v4, v5, v4 +; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v4, v0, v3 op_sel_hi:[1,0,1] +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v1, v5, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_and_b32_e32 v4, 0xff800000, v4 -; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3 -; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.l, v0.l +; GFX1150-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1 +; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v4.l, v1.l ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v3.l, v0.h -; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v3, 0x8000, v3 +; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l +; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v3.l, v1.l -; GFX1150-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v0.l, v4.l +; GFX1150-TRUE16-NEXT: global_store_b16 v2, v3, s[0:1] ; GFX1150-TRUE16-NEXT: s_endpgm ; ; GFX1150-FAKE16-LABEL: fast_frem_f16: @@ -1831,37 +1830,35 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[4:5] offset:8 +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v3, v1, s[4:5] offset:8 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v3.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_rcp_f32_e32 v4, v4 ; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff -; GFX11-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v4 -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v7, -v5, v3, v6 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v4 +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v0, v2 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v3, v7, v4 -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, v6 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_fmac_f32_e32 v0, v5, v4 +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v5, -v3, v0, v2 op_sel_hi:[1,0,1] ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_mul_f32_e32 v4, v5, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xff800000, v4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, v4, v0 +; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.l, v0.l -; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.h, v0.h +; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v3.l, v2.l +; GFX11-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.h, v1.l, v0.l -; GFX11-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v3.l, v2.l +; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: unsafe_frem_f16: @@ -1908,35 +1905,34 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX1150-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX1150-TRUE16-NEXT: s_clause 0x1 -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX1150-TRUE16-NEXT: global_load_d16_b16 v1, v2, s[4:5] offset:8 +; GFX1150-TRUE16-NEXT: global_load_u16 v3, v2, s[2:3] +; GFX1150-TRUE16-NEXT: global_load_u16 v4, v2, s[4:5] offset:8 ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l +; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v3.l ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v1.l -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(TRANS32_DEP_1) -; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v4, v4 -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v4 +; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v4.l +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1) +; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v1, v1 +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v5, v3, v6 op_sel_hi:[1,0,1] -; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v3, v7, v4 +; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v4, v0, v3 op_sel_hi:[1,0,1] +; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0, v5, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v5, v3, v6 op_sel_hi:[1,0,1] -; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v4, v5, v4 +; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v5, -v4, v0, v3 op_sel_hi:[1,0,1] +; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v1, v5, v1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_and_b32_e32 v4, 0xff800000, v4 -; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3 -; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v1.l, v0.l +; GFX1150-TRUE16-NEXT: v_and_b32_e32 v1, 0xff800000, v1 +; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0, v1, v0 +; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v4.l, v1.l ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v3.l, v0.h -; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v3, 0x8000, v3 +; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l +; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v3.l, v1.l -; GFX1150-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v0.l, v4.l +; GFX1150-TRUE16-NEXT: global_store_b16 v2, v3, s[0:1] ; GFX1150-TRUE16-NEXT: s_endpgm ; ; GFX1150-FAKE16-LABEL: unsafe_frem_f16: diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index 3c41cc43a089e..5f083fcd2b6d0 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -1648,81 +1648,86 @@ define void @void_func_v32i8(<32 x i8> %arg0) #0 { ; GFX11-TRUE16-LABEL: void_func_v32i8: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v31, off, s32 +; GFX11-TRUE16-NEXT: scratch_load_u8 v31, off, s32 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, 0 -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v15.l -; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v14.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v2.h, 8, v13.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, 8, v27.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v26.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v25.l +; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v24.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v2.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v23.l +; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v22.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v21.l +; GFX11-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v20.l +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_or_b16 v24.l, v1.h, v1.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.h, 8, v19.l +; GFX11-TRUE16-NEXT: v_and_b16 v9.h, 0xff, v18.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v10.h, 8, v17.l +; GFX11-TRUE16-NEXT: v_and_b16 v11.h, 0xff, v16.l +; GFX11-TRUE16-NEXT: v_or_b16 v20.l, v2.h, v2.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v24, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v3.h, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v12.h, 8, v29.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v28.l +; GFX11-TRUE16-NEXT: v_and_b16 v13.h, 0xff, v30.l +; GFX11-TRUE16-NEXT: v_or_b16 v16.l, v11.h, v10.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v20, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v9.h, v7.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v14.h, 8, v15.l +; GFX11-TRUE16-NEXT: v_and_b16 v14.l, 0xff, v14.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v13.l ; GFX11-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v12.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v32.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.h, 8, v11.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v10.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v6.h, 8, v9.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v8.l -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v1.h, v0.h -; GFX11-TRUE16-NEXT: v_or_b16 v12.l, v3.h, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v32.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v7.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l +; GFX11-TRUE16-NEXT: v_or_b16 v17.l, v0.l, v12.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v16, v32 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v9.h, 8, v11.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v10.l +; GFX11-TRUE16-NEXT: v_and_b16 v8.l, 0xff, v8.l +; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v3.h, v3.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l +; GFX11-TRUE16-NEXT: v_and_b16 v10.h, 0xff, v6.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v5.l ; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.l, 8, v3.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v7.h, v6.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v12, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v5.h, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v10.l, v4.l, v5.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, v9, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v6.l, v7.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v30.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.h, 8, v29.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v28.l -; GFX11-TRUE16-NEXT: v_or_b16 v11.l, v0.l, v1.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v10, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v0.h, v8.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v32.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.l, 8, v27.l -; GFX11-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v26.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v25.l -; GFX11-TRUE16-NEXT: v_and_b16 v7.h, 0xff, v24.l -; GFX11-TRUE16-NEXT: v_or_b16 v13.l, v6.h, v5.h -; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v11, v32 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v32.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.l, 8, v23.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.h, 8, v21.l -; GFX11-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v20.l -; GFX11-TRUE16-NEXT: v_or_b16 v14.l, v7.h, v6.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v32.l -; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v16.l -; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 16 -; GFX11-TRUE16-NEXT: v_or_b16 v15.l, v6.h, v5.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v32.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.h, 8, v4.h +; GFX11-TRUE16-NEXT: v_and_b16 v8.h, 0xff, v8.h +; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-TRUE16-NEXT: v_or_b16 v19.l, v4.l, v5.l +; GFX11-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v5.h ; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v31.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v4.h, v7.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v22.l -; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v13, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v5.l, v4.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v4.l, 8, v19.l -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v17.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v14, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v4.h, v8.l -; GFX11-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v18.l -; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v8.h, v5.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v15, v32 -; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v4.h, v4.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.h, 8, v31.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v13.h, v7.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v7.h, 8, v9.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v7.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, v17, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v14.l, v14.h +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_or_b16 v18.l, v8.l, v7.h +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v8.l, 8, v6.h +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, v15, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v10.l, v9.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, v18, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v10.h, v9.l +; GFX11-TRUE16-NEXT: v_or_b16 v9.l, v8.h, v8.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, v19, v32 +; GFX11-TRUE16-NEXT: v_or_b16 v32.h, v4.l, v4.h ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, v9, v32 ; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 -; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], 16 ; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -2669,33 +2674,19 @@ define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) % ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s32 offset:4 -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0 -; GFX11-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0 -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_u8 v1, off, s32 -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: void_func_byval_struct_i8_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 +; GFX11-NEXT: scratch_load_u8 v1, off, s32 +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 store { i8, i32 } %arg0.load, ptr addrspace(1) poison ret void @@ -2779,55 +2770,30 @@ define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 } ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_x2: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v1, off, s32 glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v2, off, s32 offset:8 glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 -; GFX11-TRUE16-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_x2: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: scratch_load_u8 v1, off, s32 glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 -; GFX11-FAKE16-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: void_func_byval_struct_i8_i32_x2: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: ds_store_b32 v0, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 store volatile { i8, i32 } %arg0.load, ptr addrspace(1) poison @@ -3032,99 +2998,52 @@ define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: void_func_v32i32_i1_i8_i16_bf16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x5 -; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_u8 v36, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v32, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v33, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v34, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v35, off, s32 offset:20 -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v36 -; GFX11-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: buffer_store_b16 v33, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: void_func_v32i32_i1_i8_i16_bf16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x5 -; GFX11-FAKE16-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u8 v32, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_u16 v33, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u16 v34, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u16 v35, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u16 v36, off, s32 offset:20 -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-FAKE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 1, v32 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-FAKE16-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x5 +; GFX11-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4 +; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20 +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: s_waitcnt vmcnt(5) +; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(4) +; GFX11-NEXT: v_and_b32_e32 v16, 1, v32 +; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) poison store volatile i1 %arg1, ptr addrspace(1) poison store volatile i8 %arg2, ptr addrspace(1) poison @@ -4536,185 +4455,95 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: void_func_v32i32_v16i8: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x10 -; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v32, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v33, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v34, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v35, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v36, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v37, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v38, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v39, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v48, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v49, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v50, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v51, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v52, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v53, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v54, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v55, off, s32 offset:4 -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-TRUE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-TRUE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-TRUE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-TRUE16-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-TRUE16-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-TRUE16-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-TRUE16-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-TRUE16-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-TRUE16-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-TRUE16-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-TRUE16-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-TRUE16-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-TRUE16-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-TRUE16-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-TRUE16-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: void_func_v32i32_v16i8: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x10 -; GFX11-FAKE16-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_u8 v32, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_u8 v33, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_u8 v34, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_u8 v35, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_u8 v36, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_u8 v37, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_u8 v38, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_u8 v39, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_u8 v48, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_u8 v49, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_u8 v50, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_u8 v51, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_u8 v52, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_u8 v53, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_u8 v54, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_u8 v55, off, s32 offset:4 -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(16) -; GFX11-FAKE16-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(15) -; GFX11-FAKE16-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(14) -; GFX11-FAKE16-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(13) -; GFX11-FAKE16-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(12) -; GFX11-FAKE16-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(11) -; GFX11-FAKE16-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(10) -; GFX11-FAKE16-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(9) -; GFX11-FAKE16-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(8) -; GFX11-FAKE16-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(7) -; GFX11-FAKE16-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(6) -; GFX11-FAKE16-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(5) -; GFX11-FAKE16-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(4) -; GFX11-FAKE16-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(3) -; GFX11-FAKE16-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-FAKE16-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: void_func_v32i32_v16i8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x10 +; GFX11-NEXT: scratch_load_b32 v31, off, s32 +; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64 +; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60 +; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56 +; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52 +; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48 +; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44 +; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40 +; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36 +; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32 +; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28 +; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24 +; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20 +; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16 +; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12 +; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8 +; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4 +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-NEXT: s_mov_b32 s2, -1 +; GFX11-NEXT: s_waitcnt vmcnt(16) +; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(15) +; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(14) +; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(13) +; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(12) +; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(11) +; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(10) +; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(8) +; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(7) +; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(6) +; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(5) +; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(4) +; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(3) +; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] store volatile <32 x i32> %arg0, ptr addrspace(1) poison store volatile <16 x i8> %arg1, ptr addrspace(1) poison ret void diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index f67ab18dd8ef1..5883e807964e4 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -706,63 +706,34 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_signext: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: global_load_d16_i8 v0, v[0:1], off glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_signext: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: global_load_i8 v0, v[0:1], off glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_i8_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: global_load_i8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext: ; GFX10-SCRATCH: ; %bb.0: @@ -857,63 +828,34 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_zeroext: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_zeroext: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_i8_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext: ; GFX10-SCRATCH: ; %bb.0: @@ -1153,63 +1095,34 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_signext: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_signext: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_i16_signext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext: ; GFX10-SCRATCH: ; %bb.0: @@ -1304,63 +1217,34 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_zeroext: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_zeroext: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_i16_zeroext: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext: ; GFX10-SCRATCH: ; %bb.0: @@ -3245,71 +3129,38 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_v2i8: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_v2i8: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_v2i8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i8: ; GFX10-SCRATCH: ; %bb.0: @@ -4297,77 +4148,41 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_ret: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: global_store_b8 v[40:41], v0, off -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_ret: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: global_load_u8 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: global_store_b8 v[40:41], v0, off -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_i8_ret: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-NEXT: v_mov_b32_e32 v40, 0 +; GFX11-NEXT: v_mov_b32_e32 v41, 0 +; GFX11-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: global_load_u8 v0, v[40:41], off +; GFX11-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-NEXT: v_writelane_b32 v42, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: global_store_b8 v[40:41], v0, off +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: scratch_load_b32 v41, off, s33 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 +; GFX11-NEXT: v_readlane_b32 s31, v42, 1 +; GFX11-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v42, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_ret: ; GFX10-SCRATCH: ; %bb.0: @@ -4512,7 +4327,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[40:41], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[40:41], off ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -9157,71 +8972,38 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: test_call_external_void_func_struct_i8_i32: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] -; GFX11-TRUE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 -; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: test_call_external_void_func_struct_i8_i32: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[0:1] -; GFX11-FAKE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 -; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 -; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_mov_b32 s0, s33 +; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_load_u8 v0, v1, s[0:1] +; GFX11-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -9540,7 +9322,7 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s33 offset:8 +; GFX11-TRUE16-NEXT: scratch_load_u8 v0, off, s33 offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s33 offset:12 ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll index 63376def3d7e1..b361b85cfbd4a 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll @@ -87,7 +87,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() #0 { } ; GCN-LABEL: {{^}}queue_ptr: -; WORKAROUND-TRUE16-SDAG: global_load_d16_u8 +; WORKAROUND-TRUE16-SDAG: global_load_u8 ; WORKAROUND-FAKE16: global_load_u8 v{{[0-9]+}}, ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15 @@ -129,9 +129,9 @@ define amdgpu_kernel void @queue_ptr() #1 { ; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9 ; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10 -; WORKAROUND-TRUE16-SDAG: global_load_d16_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] -; WORKAROUND-TRUE16-SDAG: global_load_d16_u8 v{{[0-9]+}}, -; WORKAROUND-TRUE16-SDAG: global_load_d16_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5] +; WORKAROUND-TRUE16-SDAG: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] +; WORKAROUND-TRUE16-SDAG: global_load_u8 v{{[0-9]+}}, +; WORKAROUND-TRUE16-SDAG: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5] ; WORKAROUND-FAKE16: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] ; WORKAROUND-FAKE16: global_load_u8 v{{[0-9]+}}, diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll b/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll index f92ba7a8978b9..e36ee94ad7cd8 100644 --- a/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll +++ b/llvm/test/CodeGen/AMDGPU/global-extload-gfx11plus.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @zextload_global_i8_to_i16(ptr addrspace(1) %out, ptr ; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-REAL16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_u8 v0, v1, s[2:3] +; GFX11-REAL16-NEXT: global_load_u8 v0, v1, s[2:3] ; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) ; GFX11-REAL16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-REAL16-NEXT: s_endpgm @@ -34,7 +34,7 @@ define amdgpu_kernel void @sextload_global_i8_to_i16(ptr addrspace(1) %out, ptr ; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-REAL16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_i8 v0, v1, s[2:3] +; GFX11-REAL16-NEXT: global_load_i8 v0, v1, s[2:3] ; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) ; GFX11-REAL16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-REAL16-NEXT: s_endpgm @@ -55,27 +55,16 @@ define amdgpu_kernel void @sextload_global_i8_to_i16(ptr addrspace(1) %out, ptr } define amdgpu_kernel void @zextload_global_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { -; GFX11-REAL16-LABEL: zextload_global_i8_to_i64: -; GFX11-REAL16: ; %bb.0: -; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-REAL16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_u8 v0, v1, s[2:3] -; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-REAL16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-REAL16-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX11-REAL16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: zextload_global_i8_to_i64: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: zextload_global_i8_to_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v1, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: global_store_b64 v1, v[0:1], s[0:1] +; GFX11-NEXT: s_endpgm %a = load i8, ptr addrspace(1) %in %ext = zext i8 %a to i64 store i64 %ext, ptr addrspace(1) %out @@ -83,31 +72,18 @@ define amdgpu_kernel void @zextload_global_i8_to_i64(ptr addrspace(1) %out, ptr } define amdgpu_kernel void @sextload_global_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { -; GFX11-REAL16-LABEL: sextload_global_i8_to_i64: -; GFX11-REAL16: ; %bb.0: -; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-REAL16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_i8 v0, v2, s[2:3] -; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-REAL16-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-REAL16-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX11-REAL16-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11-REAL16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: sextload_global_i8_to_i64: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_i8 v0, v2, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: sextload_global_i8_to_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_i8 v0, v2, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_endpgm %a = load i8, ptr addrspace(1) %in %ext = sext i8 %a to i64 store i64 %ext, ptr addrspace(1) %out @@ -147,27 +123,16 @@ define amdgpu_kernel void @sextload_global_i16_to_i32(ptr addrspace(1) %out, ptr } define amdgpu_kernel void @zextload_global_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { -; GFX11-REAL16-LABEL: zextload_global_i16_to_i64: -; GFX11-REAL16: ; %bb.0: -; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-REAL16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_b16 v0, v1, s[2:3] -; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-REAL16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-REAL16-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX11-REAL16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: zextload_global_i16_to_i64: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u16 v0, v1, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: zextload_global_i16_to_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: global_store_b64 v1, v[0:1], s[0:1] +; GFX11-NEXT: s_endpgm %a = load i16, ptr addrspace(1) %in %ext = zext i16 %a to i64 store i64 %ext, ptr addrspace(1) %out @@ -175,31 +140,18 @@ define amdgpu_kernel void @zextload_global_i16_to_i64(ptr addrspace(1) %out, ptr } define amdgpu_kernel void @sextload_global_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { -; GFX11-REAL16-LABEL: sextload_global_i16_to_i64: -; GFX11-REAL16: ; %bb.0: -; GFX11-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-REAL16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-REAL16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-REAL16-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-REAL16-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX11-REAL16-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11-REAL16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: sextload_global_i16_to_i64: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: sextload_global_i16_to_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v2, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_endpgm %a = load i16, ptr addrspace(1) %in %ext = sext i16 %a to i64 store i64 %ext, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll index 1602e31d6147c..329e1c3831525 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -5,7 +5,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s ; Test using saddr addressing mode of global_*load_* flat instructions. @@ -2290,17 +2291,11 @@ define amdgpu_ps half @global_load_saddr_i16(ptr addrspace(1) inreg %sbase, i32 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog ; -; GFX11-TRUE16-LABEL: global_load_saddr_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: ; return to shader part epilog -; -; GFX11-FAKE16-LABEL: global_load_saddr_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: ; return to shader part epilog +; GFX11-LABEL: global_load_saddr_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-TRUE16-LABEL: global_load_saddr_i16: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2333,17 +2328,11 @@ define amdgpu_ps half @global_load_saddr_i16_immneg128(ptr addrspace(1) inreg %s ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog ; -; GFX11-TRUE16-LABEL: global_load_saddr_i16_immneg128: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] offset:-128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: ; return to shader part epilog -; -; GFX11-FAKE16-LABEL: global_load_saddr_i16_immneg128: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: ; return to shader part epilog +; GFX11-LABEL: global_load_saddr_i16_immneg128: +; GFX11: ; %bb.0: +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-TRUE16-LABEL: global_load_saddr_i16_immneg128: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2377,17 +2366,11 @@ define amdgpu_ps half @global_load_saddr_f16(ptr addrspace(1) inreg %sbase, i32 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog ; -; GFX11-TRUE16-LABEL: global_load_saddr_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: ; return to shader part epilog -; -; GFX11-FAKE16-LABEL: global_load_saddr_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: ; return to shader part epilog +; GFX11-LABEL: global_load_saddr_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-TRUE16-LABEL: global_load_saddr_f16: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2419,17 +2402,11 @@ define amdgpu_ps half @global_load_saddr_f16_immneg128(ptr addrspace(1) inreg %s ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog ; -; GFX11-TRUE16-LABEL: global_load_saddr_f16_immneg128: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] offset:-128 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: ; return to shader part epilog -; -; GFX11-FAKE16-LABEL: global_load_saddr_f16_immneg128: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: ; return to shader part epilog +; GFX11-LABEL: global_load_saddr_f16_immneg128: +; GFX11: ; %bb.0: +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-SDAG-TRUE16-LABEL: global_load_saddr_f16_immneg128: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3919,17 +3896,16 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_undef_hi(ptr addrspace( ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16lo_undef_hi: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16lo_undef_hi: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16lo_undef_hi: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %load = load i16, ptr addrspace(1) %gep0 @@ -3957,17 +3933,16 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_undef_hi_immneg128(ptr ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16lo_undef_hi_immneg128: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16lo_undef_hi_immneg128: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] offset:-128 ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16lo_undef_hi_immneg128: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 @@ -4002,6 +3977,12 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_zero_hi(ptr addrspace(1 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16lo_zero_hi: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16lo_zero_hi: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] @@ -4010,13 +3991,6 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_zero_hi(ptr addrspace(1 ; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, 0, 16, v0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16lo_zero_hi: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %load = load i16, ptr addrspace(1) %gep0 @@ -4050,6 +4024,12 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_zero_hi_immneg128(ptr a ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16lo_zero_hi_immneg128: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16lo_zero_hi_immneg128: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] offset:-128 @@ -4058,13 +4038,6 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16lo_zero_hi_immneg128(ptr a ; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, 0, 16, v0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16lo_zero_hi_immneg128: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 @@ -4324,18 +4297,17 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_undef_hi(ptr addrspace( ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16hi_undef_hi: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_undef_hi: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v0, s[2:3] ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_undef_hi: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %load = load i16, ptr addrspace(1) %gep0 @@ -4363,18 +4335,17 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_undef_hi_immneg128(ptr ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16hi_undef_hi_immneg128: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_undef_hi_immneg128: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v0, s[2:3] offset:-128 ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_undef_hi_immneg128: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 @@ -4409,19 +4380,18 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_zero_hi(ptr addrspace(1 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16hi_zero_hi: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_zero_hi: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_zero_hi: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %load = load i16, ptr addrspace(1) %gep0 @@ -4455,19 +4425,18 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_zero_hi_immneg128(ptr a ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; +; GFX12-GISEL-LABEL: global_load_saddr_i16_d16hi_zero_hi_immneg128: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 +; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX12-GISEL-NEXT: ; return to shader part epilog ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_zero_hi_immneg128: ; GFX12-GISEL-TRUE16: ; %bb.0: ; GFX12-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] offset:-128 ; GFX12-GISEL-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 ; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_zero_hi_immneg128: -; GFX12-GISEL-FAKE16: ; %bb.0: -; GFX12-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] offset:-128 -; GFX12-GISEL-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 @@ -5039,3 +5008,7 @@ bb3: ; preds = %bb3, %bb !0 = !{i32 0, i32 1073741824} ; (1 << 30) !1 = !{i32 0, i32 1073741825} ; (1 << 30) + 1 +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11-FAKE16: {{.*}} +; GFX11-TRUE16: {{.*}} +; GFX12-GISEL-FAKE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index da132d0269e6b..cc653a5b4bd97 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -7451,7 +7451,7 @@ define amdgpu_kernel void @atomic_load_i8_offset(ptr addrspace(1) %in, ptr addrs ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: global_load_u8 v0, v1, s[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -7529,7 +7529,7 @@ define amdgpu_kernel void @atomic_load_i8_negoffset(ptr addrspace(1) %in, ptr ad ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] offset:-512 glc +; GFX11-TRUE16-NEXT: global_load_u8 v0, v1, s[0:1] offset:-512 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -7722,7 +7722,7 @@ define amdgpu_kernel void @atomic_load_i16_offset(ptr addrspace(1) %in, ptr addr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -7800,7 +7800,7 @@ define amdgpu_kernel void @atomic_load_i16_negoffset(ptr addrspace(1) %in, ptr a ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:-512 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:-512 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -9187,7 +9187,7 @@ define amdgpu_kernel void @atomic_load_f16_offset(ptr addrspace(1) %in, ptr addr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -9264,7 +9264,7 @@ define amdgpu_kernel void @atomic_load_f16_negoffset(ptr addrspace(1) %in, ptr a ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:-512 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:-512 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -9337,7 +9337,7 @@ define amdgpu_kernel void @atomic_load_bf16_offset(ptr addrspace(1) %in, ptr add ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:16 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:16 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -9414,7 +9414,7 @@ define amdgpu_kernel void @atomic_load_bf16_negoffset(ptr addrspace(1) %in, ptr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] offset:-512 glc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] offset:-512 glc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv @@ -9552,6 +9552,47 @@ define amdgpu_kernel void @atomic_sub_i16_soffset__amdgpu_no_remote_memory(ptr a ; GFX9-NEXT: s_cbranch_execnz .LBB136_1 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm +; +; GFX11-LABEL: atomic_sub_i16_soffset__amdgpu_no_remote_memory: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s3, s0, 0x4650 +; GFX11-NEXT: s_addc_u32 s1, s1, 0 +; GFX11-NEXT: s_and_b32 s0, s3, -4 +; GFX11-NEXT: s_and_b32 s3, s3, 3 +; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 +; GFX11-NEXT: s_lshl_b32 s5, s3, 3 +; GFX11-NEXT: s_and_b32 s6, s2, 0xffff +; GFX11-NEXT: s_lshl_b32 s2, 0xffff, s5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_not_b32 s3, s2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v1, s4 +; GFX11-NEXT: s_lshl_b32 s4, s6, s5 +; GFX11-NEXT: s_mov_b32 s5, 0 +; GFX11-NEXT: .LBB136_1: ; %atomicrmw.start +; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v0, s4, v1 +; GFX11-NEXT: v_and_b32_e32 v0, s2, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_or_b32 v0, v1, s3, v0 +; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] glc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, v0 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: s_cbranch_execnz .LBB136_1 +; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX11-NEXT: s_endpgm %gep = getelementptr i16, ptr addrspace(1) %out, i64 9000 %val = atomicrmw sub ptr addrspace(1) %gep, i16 %in syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 ret void @@ -9671,6 +9712,47 @@ define amdgpu_kernel void @atomic_sub_i8_soffset__amdgpu_no_remote_memory(ptr ad ; GFX9-NEXT: s_cbranch_execnz .LBB137_1 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm +; +; GFX11-LABEL: atomic_sub_i8_soffset__amdgpu_no_remote_memory: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s3, s0, 0x2328 +; GFX11-NEXT: s_addc_u32 s1, s1, 0 +; GFX11-NEXT: s_and_b32 s0, s3, -4 +; GFX11-NEXT: s_and_b32 s3, s3, 3 +; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 +; GFX11-NEXT: s_lshl_b32 s5, s3, 3 +; GFX11-NEXT: s_and_b32 s6, s2, 0xff +; GFX11-NEXT: s_lshl_b32 s2, 0xff, s5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_not_b32 s3, s2 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v1, s4 +; GFX11-NEXT: s_lshl_b32 s4, s6, s5 +; GFX11-NEXT: s_mov_b32 s5, 0 +; GFX11-NEXT: .LBB137_1: ; %atomicrmw.start +; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v0, s4, v1 +; GFX11-NEXT: v_and_b32_e32 v0, s2, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_and_or_b32 v0, v1, s3, v0 +; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] glc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, v0 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: s_cbranch_execnz .LBB137_1 +; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX11-NEXT: s_endpgm %gep = getelementptr i8, ptr addrspace(1) %out, i64 9000 %val = atomicrmw sub ptr addrspace(1) %gep, i8 %in syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll index 8e427a6ef2023..1bb4fb30465f3 100644 --- a/llvm/test/CodeGen/AMDGPU/half.ll +++ b/llvm/test/CodeGen/AMDGPU/half.ll @@ -967,7 +967,7 @@ define amdgpu_kernel void @global_load_store_f16(ptr addrspace(1) %out, ptr addr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm @@ -1100,12 +1100,12 @@ define amdgpu_kernel void @global_extload_f16_to_f32(ptr addrspace(1) %out, ptr ; GFX11-TRUE16-LABEL: global_extload_f16_to_f32: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v1.l +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: global_extload_f16_to_f32: @@ -1694,7 +1694,7 @@ define amdgpu_kernel void @global_extload_f16_to_f64(ptr addrspace(1) %out, ptr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3512,7 +3512,7 @@ define amdgpu_kernel void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addr ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[2:3] ; GFX11-TRUE16-NEXT: s_endpgm @@ -3554,7 +3554,7 @@ define amdgpu_kernel void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrs ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll index fc4cdcda99ae4..f7d90cbf45bcb 100644 --- a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll @@ -77,18 +77,18 @@ define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -174,18 +174,18 @@ define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ne_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -271,18 +271,18 @@ define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_gt_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_gt_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -368,18 +368,18 @@ define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ge_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ge_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -465,18 +465,18 @@ define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_lt_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_lt_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -562,18 +562,18 @@ define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_le_u16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_le_u16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -660,18 +660,18 @@ define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -757,18 +757,18 @@ define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ge_i16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ge_i16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -854,18 +854,18 @@ define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_lt_i16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_lt_i16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -951,18 +951,18 @@ define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.pt ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v2, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_le_i16_e32 vcc_lo, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_le_i16_e32 vcc_lo, v2.l, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1039,17 +1039,17 @@ define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a ; GFX11-TRUE16-LABEL: i16_eq_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1123,17 +1123,17 @@ define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a ; GFX11-TRUE16-LABEL: i16_ne_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ne_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1207,17 +1207,17 @@ define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_ugt_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_lt_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_lt_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1291,17 +1291,17 @@ define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_uge_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_le_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_le_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1375,17 +1375,17 @@ define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_ult_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_gt_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_gt_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1459,17 +1459,17 @@ define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_ule_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ge_u16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ge_u16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1543,17 +1543,17 @@ define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_sgt_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_lt_i16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_lt_i16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1627,17 +1627,17 @@ define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_sge_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_le_i16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_le_i16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1711,17 +1711,17 @@ define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_slt_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_gt_i16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1795,17 +1795,17 @@ define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-TRUE16-LABEL: i16_sle_v_s: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_load_b32 s4, s[4:5], 0x34 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ge_i16_e32 vcc_lo, s4, v0.l -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo -; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_cmp_ge_i16_e32 vcc_lo, s4, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll index ab38bd21994ec..46e803245433a 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -1159,42 +1159,43 @@ define amdgpu_kernel void @idot4_acc16_vecMul(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: idot4_acc16_vecMul: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 ; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v3, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v0, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_u16 v3, v2, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v0, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v4.h, 8, v1.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h -; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v2.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h +; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v0.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v1.h, 8, v1.h -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v0, 0, 8 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v4, v4, v7 +; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v0.h ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v3.l ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v2 +; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v7 ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v4.h ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.h -; GFX11-DL-TRUE16-NEXT: global_store_b16 v3, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_store_b16 v2, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: idot4_acc16_vecMul: diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index 305461ed6b208..22060a2d63749 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -1669,37 +1669,35 @@ define amdgpu_kernel void @notdot4_mixedtypes(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v6, 0 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 -; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v5, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v6, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_u16 v6, v5, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v3 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v5 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v4, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_perm_b32 v4, v4, v4, 0xc0c0302 +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v6.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 -; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v5, v5, 0xc0c0302 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v4, v4, 0xc0c0302 -; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v2, v1, v0 -; GFX11-DL-TRUE16-NEXT: global_store_b16 v6, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v3, v3, 0xc0c0302 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v1, v4, v0 +; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes: @@ -1969,32 +1967,34 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3] ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_u16 v6, v5, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 8, v3 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l -; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v4, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.h +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 24, v4 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v0, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v1.l +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.l +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, v6.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v3.h +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v0.h, v0.l ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v1.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v4.l, v1.l, v0.l ; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; @@ -2438,38 +2438,39 @@ define amdgpu_kernel void @udot4_acc16_vecMul(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: udot4_acc16_vecMul: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 ; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v3, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v0, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_u16 v3, v2, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v4.h, 8, v1.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v5.h, 8, v2.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v5.h, 8, v0.l ; GFX11-DL-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v1.l -; GFX11-DL-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v2.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX11-DL-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v0.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v0 ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.h -; GFX11-DL-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.h +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v4, v4, v5 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l +; GFX11-DL-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v0.h +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.h, v6.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v3.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v5 ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v4.h +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.h -; GFX11-DL-TRUE16-NEXT: global_store_b16 v3, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_store_b16 v2, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: udot4_acc16_vecMul: @@ -2713,44 +2714,46 @@ define amdgpu_kernel void @udot4_acc8_vecMul(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: udot4_acc8_vecMul: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 -; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_d16_u8 v0, v5, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_u8 v5, v4, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v0.h, 8, v3.l -; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v3.h, v4.h -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v1.h, 8, v4.l -; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v4.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v2.l, v2.l, v6.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v0.l, 8, v2.l +; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v2.h, v3.h +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b16 v1.l, 8, v3.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v6.l, v7.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, 0 -; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l -; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.h, v1.h +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v0.h ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-DL-TRUE16-NEXT: v_lshlrev_b16 v2.l, 8, v2.l +; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l +; GFX11-DL-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.h +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.h, v6.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.h -; GFX11-DL-TRUE16-NEXT: v_or_b16 v6.h, v1.l, v2.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-DL-TRUE16-NEXT: v_or_b32_e32 v1, v7, v6 +; GFX11-DL-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_or_b16 v6.h, v0.h, v1.l ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.h, v4.h, v0.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_or_b32_e32 v0, v7, v6 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v0 +; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v5.l +; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v6.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.h, v3.h, v0.l ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l -; GFX11-DL-TRUE16-NEXT: global_store_b8 v5, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_store_b8 v4, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: udot4_acc8_vecMul: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll index 4419b8c6f9862..57db2c94ce908 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll @@ -13,7 +13,7 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16( ; SDAG-GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[6:7] +; SDAG-GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] ; SDAG-GFX11-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0 ; SDAG-GFX11-TRUE16-NEXT: s_load_b32 s3, s[4:5], 0x0 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -51,14 +51,14 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16_dpp( ; SDAG-GFX11-TRUE16: ; %bb.0: ; %entry ; SDAG-GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s1 -; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s2 -; SDAG-GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s3 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s1 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s2 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_u16 v2, off, s3 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; SDAG-GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; SDAG-GFX11-TRUE16-NEXT: v_dot2_bf16_bf16 v0.l, v1, v2, v0.l +; SDAG-GFX11-TRUE16-NEXT: v_dot2_bf16_bf16 v0.l, v0, v1, v2.l ; SDAG-GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, s0 ; SDAG-GFX11-TRUE16-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll index 0194d25a99cdc..61941e29495cc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s --check-prefixes=GFX11,SDAG-GFX11,SDAG-GFX11-TRUE16 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s --check-prefixes=GFX11,SDAG-GFX11,SDAG-GFX11-FAKE16 -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-TRUE16 +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-TRUE16 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-FAKE16 declare half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a, <2 x half> %b, half %c) @@ -12,7 +13,7 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16( ; SDAG-GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[6:7] +; SDAG-GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[6:7] ; SDAG-GFX11-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0 ; SDAG-GFX11-TRUE16-NEXT: s_load_b32 s3, s[4:5], 0x0 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -33,6 +34,18 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16( ; SDAG-GFX11-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; SDAG-GFX11-FAKE16-NEXT: s_endpgm ; +; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16: +; GISEL-GFX11: ; %bb.0: ; %entry +; GISEL-GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: global_load_u16 v1, v0, s[6:7] +; GISEL-GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GISEL-GFX11-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_dot2_f16_f16 v1, s2, s3, v1 +; GISEL-GFX11-NEXT: global_store_b16 v0, v1, s[0:1] +; GISEL-GFX11-NEXT: s_endpgm ; GISEL-GFX11-TRUE16-LABEL: test_llvm_amdgcn_fdot2_f16_f16: ; GISEL-GFX11-TRUE16: ; %bb.0: ; %entry ; GISEL-GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 @@ -45,19 +58,6 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16( ; GISEL-GFX11-TRUE16-NEXT: v_dot2_f16_f16 v0.l, s2, s3, v0.l ; GISEL-GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GISEL-GFX11-TRUE16-NEXT: s_endpgm -; -; GISEL-GFX11-FAKE16-LABEL: test_llvm_amdgcn_fdot2_f16_f16: -; GISEL-GFX11-FAKE16: ; %bb.0: ; %entry -; GISEL-GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GISEL-GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[6:7] -; GISEL-GFX11-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 -; GISEL-GFX11-FAKE16-NEXT: s_load_b32 s3, s[4:5], 0x0 -; GISEL-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GISEL-GFX11-FAKE16-NEXT: v_dot2_f16_f16 v1, s2, s3, v1 -; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GISEL-GFX11-FAKE16-NEXT: s_endpgm ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b, @@ -76,14 +76,14 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp( ; SDAG-GFX11-TRUE16: ; %bb.0: ; %entry ; SDAG-GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s1 -; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v2, off, s2 -; SDAG-GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s3 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s1 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s2 +; SDAG-GFX11-TRUE16-NEXT: scratch_load_u16 v2, off, s3 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) -; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; SDAG-GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; SDAG-GFX11-TRUE16-NEXT: v_dot2_f16_f16 v0.l, v1, v2, v0.l +; SDAG-GFX11-TRUE16-NEXT: v_dot2_f16_f16 v0.l, v0, v1, v2.l ; SDAG-GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, s0 ; SDAG-GFX11-TRUE16-NEXT: s_endpgm ; @@ -99,6 +99,17 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp( ; SDAG-GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, s0 ; SDAG-GFX11-FAKE16-NEXT: s_endpgm ; +; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp: +; GISEL-GFX11: ; %bb.0: ; %entry +; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: scratch_load_b32 v0, off, s1 +; GISEL-GFX11-NEXT: scratch_load_b32 v1, off, s2 +; GISEL-GFX11-NEXT: scratch_load_u16 v2, off, s3 +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) +; GISEL-GFX11-NEXT: v_dot2_f16_f16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0 +; GISEL-GFX11-NEXT: s_endpgm ; GISEL-GFX11-TRUE16-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp: ; GISEL-GFX11-TRUE16: ; %bb.0: ; %entry ; GISEL-GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -113,18 +124,6 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp( ; GISEL-GFX11-TRUE16-NEXT: v_dot2_f16_f16 v0.l, v1, v2, v0.l ; GISEL-GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, s0 ; GISEL-GFX11-TRUE16-NEXT: s_endpgm -; -; GISEL-GFX11-FAKE16-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp: -; GISEL-GFX11-FAKE16: ; %bb.0: ; %entry -; GISEL-GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GISEL-GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s1 -; GISEL-GFX11-FAKE16-NEXT: scratch_load_b32 v1, off, s2 -; GISEL-GFX11-FAKE16-NEXT: scratch_load_u16 v2, off, s3 -; GISEL-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GISEL-GFX11-FAKE16-NEXT: v_dot2_f16_f16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GISEL-GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, s0 -; GISEL-GFX11-FAKE16-NEXT: s_endpgm ptr addrspace(5) %r, ptr addrspace(5) %a, ptr addrspace(5) %b, @@ -144,5 +143,5 @@ entry: declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11: {{.*}} -; GISEL-GFX11: {{.*}} +; GISEL-GFX11-FAKE16: {{.*}} ; SDAG-GFX11: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll index 7d63e22d84b72..47693767e7d6c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -76,7 +76,7 @@ define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0.15915494, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll index ba03115c51536..86fcb29776240 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -76,7 +76,7 @@ define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) { ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0.15915494, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/mad.u16.ll b/llvm/test/CodeGen/AMDGPU/mad.u16.ll index fbf8011fd40c9..3915ece69f366 100644 --- a/llvm/test/CodeGen/AMDGPU/mad.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/mad.u16.ll @@ -69,18 +69,18 @@ define amdgpu_kernel void @mad_u16( ; GFX11-TRUE16-LABEL: mad_u16: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 1, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v1, s[6:7] glc dlc +; GFX11-TRUE16-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l -; GFX11-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l +; GFX11-TRUE16-NEXT: global_store_b16 v3, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: mad_u16: diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll index 311527d5d04cc..39d73a293647a 100644 --- a/llvm/test/CodeGen/AMDGPU/min.ll +++ b/llvm/test/CodeGen/AMDGPU/min.ll @@ -1458,10 +1458,10 @@ define amdgpu_kernel void @v_test_imin_slt_i16(ptr addrspace(1) %out, ptr addrsp ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u16 v2, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v0.h +; GFX11-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v2.l ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -2747,10 +2747,10 @@ define amdgpu_kernel void @v_test_umin_ult_i8(ptr addrspace(1) %out, ptr addrspa ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[2:3] -; GFX11-TRUE16-NEXT: global_load_d16_hi_u8 v0, v1, s[4:5] +; GFX11-TRUE16-NEXT: global_load_u8 v0, v1, s[2:3] +; GFX11-TRUE16-NEXT: global_load_u8 v2, v1, s[4:5] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h +; GFX11-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v2.l ; GFX11-TRUE16-NEXT: global_store_b8 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -3176,47 +3176,26 @@ define amdgpu_kernel void @v_test_umin_ult_i16_multi_use(ptr addrspace(1) %out0, ; GFX10-NEXT: global_store_byte v0, v2, s[2:3] ; GFX10-NEXT: s_endpgm ; -; GFX11-TRUE16-LABEL: v_test_umin_ult_i16_multi_use: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v1, v0, s[6:7] -; GFX11-TRUE16-NEXT: global_load_d16_b16 v2, v0, s[4:5] -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v1 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cmp_lt_u32_e32 vcc_lo, v4, v3 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX11-TRUE16-NEXT: s_clause 0x1 -; GFX11-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-TRUE16-NEXT: global_store_b8 v0, v2, s[2:3] -; GFX11-TRUE16-NEXT: s_endpgm -; -; GFX11-FAKE16-LABEL: v_test_umin_ult_i16_multi_use: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[6:7] -; GFX11-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0xffff, v1 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v2 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_cmp_lt_u32_e32 vcc_lo, v4, v3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX11-FAKE16-NEXT: s_clause 0x1 -; GFX11-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-FAKE16-NEXT: global_store_b8 v0, v2, s[2:3] -; GFX11-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: v_test_umin_ult_i16_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_load_u16 v1, v0, s[6:7] +; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmp_lt_u32_e32 vcc_lo, v4, v3 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] +; GFX11-NEXT: s_endpgm ; ; GFX1250-LABEL: v_test_umin_ult_i16_multi_use: ; GFX1250: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll index a3c38b17abf00..08a9ea985e459 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -32,19 +32,12 @@ define i8 @flat_inst_valu_offset_1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:1 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:1 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -68,13 +61,6 @@ define i8 @flat_inst_valu_offset_1(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:1 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -107,19 +93,12 @@ define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2047 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2047 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -143,13 +122,6 @@ define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:2047 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -182,19 +154,12 @@ define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -218,13 +183,6 @@ define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_12bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -259,25 +217,15 @@ define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -354,25 +302,15 @@ define i8 @flat_inst_valu_offset_24bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_24bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -449,25 +387,15 @@ define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -491,16 +419,6 @@ define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -535,25 +453,15 @@ define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -577,16 +485,6 @@ define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -621,25 +519,15 @@ define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -663,16 +551,6 @@ define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -707,25 +585,15 @@ define i8 @flat_inst_valu_offset_neg_24bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_neg_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_neg_24bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_neg_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -749,16 +617,6 @@ define i8 @flat_inst_valu_offset_neg_24bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_neg_24bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_neg_24bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -792,19 +650,12 @@ define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -828,13 +679,6 @@ define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -869,25 +713,15 @@ define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -964,25 +798,15 @@ define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1059,25 +883,15 @@ define i8 @flat_inst_valu_offset_2x_24bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4094 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4094 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4094 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1163,25 +977,15 @@ define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1205,16 +1009,6 @@ define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1249,25 +1043,15 @@ define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1291,16 +1075,6 @@ define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1335,25 +1109,15 @@ define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1377,16 +1141,6 @@ define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1421,25 +1175,15 @@ define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1469,16 +1213,6 @@ define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1517,25 +1251,15 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2047 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_11bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2047 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1622,25 +1346,15 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2048 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_11bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2048 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1727,25 +1441,15 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_12bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1832,25 +1536,15 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_12bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1880,16 +1574,6 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1928,25 +1612,15 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_13bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2033,25 +1707,15 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_13bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2081,16 +1745,6 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2130,25 +1784,15 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2188,16 +1832,6 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2237,25 +1871,15 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2295,16 +1919,6 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2344,25 +1958,15 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2402,16 +2006,6 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2451,25 +2045,15 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2509,16 +2093,6 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2558,25 +2132,15 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2616,16 +2180,6 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2665,25 +2219,15 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: flat_load_u8 v0, v[0:1] +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2723,16 +2267,6 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2776,25 +2310,15 @@ define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:1 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: flat_inst_salu_offset_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2816,16 +2340,6 @@ define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: flat_inst_salu_offset_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_endpgm -; ; GFX12-GISEL-LABEL: flat_inst_salu_offset_1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -2866,25 +2380,15 @@ define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2047 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: flat_inst_salu_offset_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2906,16 +2410,6 @@ define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: flat_inst_salu_offset_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_endpgm -; ; GFX12-GISEL-LABEL: flat_inst_salu_offset_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -2956,25 +2450,15 @@ define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: flat_inst_salu_offset_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2996,16 +2480,6 @@ define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: flat_inst_salu_offset_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_endpgm -; ; GFX12-GISEL-LABEL: flat_inst_salu_offset_12bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3048,29 +2522,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3160,29 +2622,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3272,29 +2722,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3384,29 +2822,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3494,25 +2920,15 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3534,16 +2950,6 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) { ; GFX12-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_endpgm -; ; GFX12-GISEL-LABEL: flat_inst_salu_offset_2x_11bit_max: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3586,29 +2992,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3698,29 +3092,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x3000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x3000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x3000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3810,29 +3192,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3922,29 +3292,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4034,29 +3392,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4146,29 +3492,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2047 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_11bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4265,29 +3599,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:2048 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_11bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4384,29 +3706,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_12bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4504,29 +3814,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_12bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4624,29 +3922,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_13bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4744,29 +4030,17 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) { ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_13bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4865,31 +4139,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4990,31 +4251,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -5115,31 +4363,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -5240,31 +4475,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -5365,31 +4587,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -5490,31 +4699,18 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr ; GFX10-NEXT: flat_store_byte v[0:1], v0 ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: flat_load_u8 v0, v[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -5588,10 +4784,10 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10-GISEL: {{.*}} ; GFX10-SDAG: {{.*}} -; GFX11: {{.*}} ; GFX11-GISEL-FAKE16: {{.*}} ; GFX11-GISEL-TRUE16: {{.*}} -; GFX11-SDAG: {{.*}} +; GFX11-SDAG-FAKE16: {{.*}} +; GFX11-SDAG-TRUE16: {{.*}} ; GFX12: {{.*}} ; GFX12-GISEL-FAKE16: {{.*}} ; GFX12-GISEL-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll index 20916a9a51d9e..4f230b2231ef2 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -30,12 +30,12 @@ define i8 @global_inst_valu_offset_1(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:1 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_1: ; GFX12-GISEL: ; %bb.0: @@ -48,20 +48,6 @@ define i8 @global_inst_valu_offset_1(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:1 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:1 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -103,12 +89,12 @@ define i8 @global_inst_valu_offset_11bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:2047 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -121,20 +107,6 @@ define i8 @global_inst_valu_offset_11bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:2047 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:2047 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -178,12 +150,12 @@ define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -205,20 +177,6 @@ define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -303,25 +261,15 @@ define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -407,25 +355,15 @@ define i8 @global_inst_valu_offset_24bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_24bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -468,12 +406,12 @@ define i8 @global_inst_valu_offset_neg_11bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -486,20 +424,6 @@ define i8 @global_inst_valu_offset_neg_11bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-2048 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -543,12 +467,12 @@ define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -561,20 +485,6 @@ define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-4096 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -620,15 +530,15 @@ define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX12-GISEL: ; %bb.0: @@ -641,26 +551,6 @@ define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -706,15 +596,15 @@ define i8 @global_inst_valu_offset_neg_24bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_neg_24bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_neg_24bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_neg_24bit_max: ; GFX12-GISEL: ; %bb.0: @@ -727,26 +617,6 @@ define i8 @global_inst_valu_offset_neg_24bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_neg_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_neg_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -790,12 +660,12 @@ define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -817,20 +687,6 @@ define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -915,25 +771,15 @@ define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1019,25 +865,15 @@ define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1126,25 +962,15 @@ define i8 @global_inst_valu_offset_2x_24bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4094 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4094 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4094 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1195,12 +1021,12 @@ define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -1213,20 +1039,6 @@ define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-4096 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1272,15 +1084,15 @@ define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -1293,26 +1105,6 @@ define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1358,15 +1150,15 @@ define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX12-GISEL: ; %bb.0: @@ -1379,26 +1171,6 @@ define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -1486,25 +1258,15 @@ define i8 @global_inst_valu_offset_2x_neg_24bit_max(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_24bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff001000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_2x_neg_24bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0xff001000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff001000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_2x_neg_24bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1601,25 +1363,15 @@ define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:2047 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_11bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:2047 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1706,25 +1458,15 @@ define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) { ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:2048 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_11bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:2048 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1820,25 +1562,15 @@ define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_12bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -1892,15 +1624,15 @@ define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX12-GISEL: ; %bb.0: @@ -1916,26 +1648,6 @@ define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_12bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2030,25 +1742,15 @@ define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) { ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_13bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2102,15 +1804,15 @@ define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX12-GISEL: ; %bb.0: @@ -2126,26 +1828,6 @@ define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_13bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2242,25 +1924,15 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-2049 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-2049 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2049 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2349,25 +2021,15 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-2048 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2465,25 +2127,15 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-1 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-1 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2538,15 +2190,15 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: @@ -2572,26 +2224,6 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2688,25 +2320,15 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:-1 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:-1 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -2761,15 +2383,15 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo +; GFX11-NEXT: global_load_u8 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: @@ -2795,26 +2417,6 @@ define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 0x80000000, v1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 @@ -2868,15 +2470,15 @@ define amdgpu_kernel void @global_inst_salu_offset_1(ptr addrspace(1) %p) { ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_1: ; GFX12-GISEL: ; %bb.0: @@ -2888,26 +2490,6 @@ define amdgpu_kernel void @global_inst_salu_offset_1(ptr addrspace(1) %p) { ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:1 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -2954,15 +2536,15 @@ define amdgpu_kernel void @global_inst_salu_offset_11bit_max(ptr addrspace(1) %p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -2974,26 +2556,6 @@ define amdgpu_kernel void @global_inst_salu_offset_11bit_max(ptr addrspace(1) %p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:2047 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3040,15 +2602,15 @@ define amdgpu_kernel void @global_inst_salu_offset_12bit_max(ptr addrspace(1) %p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3060,26 +2622,6 @@ define amdgpu_kernel void @global_inst_salu_offset_12bit_max(ptr addrspace(1) %p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3126,15 +2668,15 @@ define amdgpu_kernel void @global_inst_salu_offset_13bit_max(ptr addrspace(1) %p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_13bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3146,26 +2688,6 @@ define amdgpu_kernel void @global_inst_salu_offset_13bit_max(ptr addrspace(1) %p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3212,15 +2734,15 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(ptr addrspace(1 ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3232,26 +2754,6 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(ptr addrspace(1 ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:-2048 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3301,15 +2803,15 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(ptr addrspace(1 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_neg_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_neg_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3332,26 +2834,6 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(ptr addrspace(1 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3437,29 +2919,17 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(ptr addrspace(1 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -3507,15 +2977,15 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(ptr addrspace(1) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_2x_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3527,26 +2997,6 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(ptr addrspace(1) ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3593,15 +3043,15 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(ptr addrspace(1) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_12bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_2x_12bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3613,26 +3063,6 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(ptr addrspace(1) ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0x1000 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3679,15 +3109,15 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(ptr addrspace(1) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_13bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x3000 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0x3000 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_2x_13bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3699,26 +3129,6 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(ptr addrspace(1) ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3000 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3000 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3768,15 +3178,15 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(ptr addrspac ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_11bit_max: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_2x_neg_11bit_max: ; GFX12-GISEL: ; %bb.0: @@ -3799,26 +3209,6 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(ptr addrspac ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_11bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_neg_11bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_11bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -3904,29 +3294,17 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(ptr addrspac ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_12bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_neg_12bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_12bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4013,29 +3391,17 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(ptr addrspac ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_13bit_max: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_2x_neg_13bit_max: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_2x_neg_13bit_max: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4126,29 +3492,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:2047 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_11bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4243,29 +3597,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:2048 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_11bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4360,29 +3702,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_12bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4477,29 +3807,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_12bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4594,29 +3912,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off offset:4095 glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_13bit_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4711,29 +4017,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(ptr addrsp ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SDAG-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_13bit_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 +; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-SDAG-NEXT: s_endpgm ; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: @@ -4790,17 +4084,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0x7ff +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: @@ -4814,30 +4108,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0x7ff -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0x7ff -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -4895,17 +4165,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800 -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0x800 +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: @@ -4919,30 +4189,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0x800 -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0x800 -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -5000,17 +4246,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0xfff +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: @@ -5024,30 +4270,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0xfff -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0xfff -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -5105,17 +4327,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0x1000 +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: @@ -5129,30 +4351,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0x1000 -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0x1000 -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -5210,17 +4408,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0x1fff +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX12-GISEL: ; %bb.0: @@ -5234,30 +4432,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0x1fff -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0x1fff -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -5315,17 +4489,17 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_endpgm ; -; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 -; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-GISEL-NEXT: s_endpgm +; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, 0x2000 +; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX12-GISEL: ; %bb.0: @@ -5339,30 +4513,6 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p ; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off ; GFX12-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_add_u32 s0, s0, 0x2000 -; GFX11-SDAG-TRUE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-TRUE16-NEXT: s_endpgm -; -; GFX11-SDAG-FAKE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_add_u32 s0, s0, 0x2000 -; GFX11-SDAG-FAKE16-NEXT: s_addc_u32 s1, s1, 0x80000000 -; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[0:1], v0, off -; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX12-SDAG-TRUE16-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX12-SDAG-TRUE16: ; %bb.0: ; GFX12-SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -5394,10 +4544,10 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX11: {{.*}} ; GFX11-GISEL-FAKE16: {{.*}} ; GFX11-GISEL-TRUE16: {{.*}} -; GFX11-SDAG: {{.*}} +; GFX11-SDAG-FAKE16: {{.*}} +; GFX11-SDAG-TRUE16: {{.*}} ; GFX12: {{.*}} ; GFX12-GISEL-FAKE16: {{.*}} ; GFX12-GISEL-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index b1e05158b6212..1419529644cfd 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -2641,7 +2641,7 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) { ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, 0 -; GFX11-TRUE16-NEXT: flat_load_d16_u8 v0, v[0:1] +; GFX11-TRUE16-NEXT: flat_load_u8 v0, v[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0.l ; GFX11-TRUE16-NEXT: .LBB8_1: ; %branch diff --git a/llvm/test/CodeGen/AMDGPU/rotl.ll b/llvm/test/CodeGen/AMDGPU/rotl.ll index 25020673bce22..638cd0d0c5181 100644 --- a/llvm/test/CodeGen/AMDGPU/rotl.ll +++ b/llvm/test/CodeGen/AMDGPU/rotl.ll @@ -374,15 +374,15 @@ define void @test_rotl_i16(ptr addrspace(1) nocapture readonly %sourceA, ptr add ; GFX11-TRUE16-LABEL: test_rotl_i16: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v2, v[2:3], off offset:48 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off offset:32 +; GFX11-TRUE16-NEXT: global_load_u16 v2, v[2:3], off offset:48 +; GFX11-TRUE16-NEXT: global_load_u16 v1, v[0:1], off offset:32 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.h, 0, v2.l +; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, 0, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, v2.l, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, v2.l, v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.l, v0.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.h, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v[4:5], v0, off offset:8 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/rotr.ll b/llvm/test/CodeGen/AMDGPU/rotr.ll index 74ac181c120b5..042b9f7c85d45 100644 --- a/llvm/test/CodeGen/AMDGPU/rotr.ll +++ b/llvm/test/CodeGen/AMDGPU/rotr.ll @@ -331,15 +331,15 @@ define void @test_rotr_i16(ptr addrspace(1) nocapture readonly %sourceA, ptr add ; GFX11-TRUE16-LABEL: test_rotr_i16: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v2, v[2:3], off offset:48 -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off offset:32 +; GFX11-TRUE16-NEXT: global_load_u16 v2, v[2:3], off offset:48 +; GFX11-TRUE16-NEXT: global_load_u16 v1, v[0:1], off offset:32 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.h, 0, v2.l +; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, 0, v2.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshrrev_b16 v1.l, v2.l, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.h, v2.l, v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, v0.h, v0.l -; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l +; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.h, v0.l ; GFX11-TRUE16-NEXT: global_store_b16 v[4:5], v0, off offset:8 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index 91c88ec5e718c..9a0a2ee16db1a 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -9,8 +9,9 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s +; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s ; Test that add/sub with a constant is swapped to sub/add with negated ; constant to minimize code size. @@ -128,19 +129,18 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_i32_x_sub_64: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_i32_x_sub_64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-LABEL: v_test_i32_x_sub_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -324,25 +324,24 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out, ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 -; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2 -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_i32_x_sub_64_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 64, v2 +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1340,7 +1339,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] @@ -1358,7 +1357,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp ; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1371,7 +1369,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp ; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, 0xffc0, v0.l ; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-GISEL-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64: ; GFX11-GISEL-FAKE16: ; %bb.0: ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1522,16 +1519,16 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out ; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[2:3] -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 -; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0 +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v1.l, v1.l, 64 +; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-TRUE16-NEXT: s_endpgm ; ; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: @@ -1549,7 +1546,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1564,7 +1560,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out ; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, 0xffc0, v0.l ; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-GISEL-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: ; GFX11-GISEL-FAKE16: ; %bb.0: ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1759,12 +1754,12 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[2:3] glc dlc +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 -; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v0.h, 64 +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v2.l, 64 ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] dlc @@ -1789,7 +1784,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-FAKE16-NEXT: s_endpgm -; ; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -1808,7 +1802,6 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ; GFX11-GISEL-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] dlc ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: ; GFX11-GISEL-FAKE16: ; %bb.0: ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -3648,19 +3641,18 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1] -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_v2i16_x_add_neg_fpone: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1] +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -3813,19 +3805,18 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1] -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_v2i16_x_add_neg_negfpone: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1] +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -4203,19 +4194,18 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_v2i16_x_add_undef_neg32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-TRUE16-LABEL: v_test_v2i16_x_add_undef_neg32: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -4229,7 +4219,6 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ; GFX11-GISEL-TRUE16-NEXT: v_pk_add_u16 v1, v1, s2 ; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-GISEL-TRUE16-NEXT: s_endpgm -; ; GFX11-GISEL-FAKE16-LABEL: v_test_v2i16_x_add_undef_neg32: ; GFX11-GISEL-FAKE16: ; %bb.0: ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -4371,19 +4360,18 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11-SDAG-NEXT: s_endpgm -; +; GFX11-LABEL: v_test_v2i16_x_add_neg32_undef: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 +; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11-NEXT: s_endpgm ; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -4410,3 +4398,5 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll index a9fb77904c641..3c774c521e3e2 100644 --- a/llvm/test/CodeGen/AMDGPU/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/smed3.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16,-d16-hw-bug < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll index 3d21860e2af40..9708a359360b3 100644 --- a/llvm/test/CodeGen/AMDGPU/spillv16.ll +++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll @@ -6,7 +6,7 @@ define void @spill_i16_alu() { ; GCN-TRUE16-LABEL: spill_i16_alu: ; GCN-TRUE16: ; %bb.0: ; %entry ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill @@ -52,13 +52,13 @@ define void @spill_i16_alu_two_vals() { ; GCN-TRUE16-LABEL: spill_i16_alu_two_vals: ; GCN-TRUE16: ; %bb.0: ; %entry ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill ; GCN-TRUE16-NEXT: ;;#ASMSTART ; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l @@ -113,33 +113,19 @@ entry: ; Tests after this do not actually test 16 bit spills because there is no use of VGPR_16. They could demonstrate 16 bit spills if we update the instructions to use VGPR_16 instead of VGPR_32 define void @spill_i16() { -; GCN-TRUE16-LABEL: spill_i16: -; GCN-TRUE16: ; %bb.0: ; %entry -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill -; GCN-TRUE16-NEXT: ;;#ASMSTART -; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GCN-FAKE16-LABEL: spill_i16: -; GCN-FAKE16: ; %bb.0: ; %entry -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill -; GCN-FAKE16-NEXT: ;;#ASMSTART -; GCN-FAKE16-NEXT: ;;#ASMEND -; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: spill_i16: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca i16, i32 1, align 4, addrspace(5) @@ -156,33 +142,19 @@ entry: } define void @spill_half() { -; GCN-TRUE16-LABEL: spill_half: -; GCN-TRUE16: ; %bb.0: ; %entry -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill -; GCN-TRUE16-NEXT: ;;#ASMSTART -; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GCN-FAKE16-LABEL: spill_half: -; GCN-FAKE16: ; %bb.0: ; %entry -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill -; GCN-FAKE16-NEXT: ;;#ASMSTART -; GCN-FAKE16-NEXT: ;;#ASMEND -; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc -; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: spill_half: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca half, i32 1, align 4, addrspace(5) @@ -199,33 +171,19 @@ entry: } define void @spill_i16_from_v2i16() { -; GCN-TRUE16-LABEL: spill_i16_from_v2i16: -; GCN-TRUE16: ; %bb.0: ; %entry -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill -; GCN-TRUE16-NEXT: ;;#ASMSTART -; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc -; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GCN-FAKE16-LABEL: spill_i16_from_v2i16: -; GCN-FAKE16: ; %bb.0: ; %entry -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill -; GCN-FAKE16-NEXT: ;;#ASMSTART -; GCN-FAKE16-NEXT: ;;#ASMEND -; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload -; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc -; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: spill_i16_from_v2i16: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) @@ -245,19 +203,19 @@ define void @spill_2xi16_from_v2i16() { ; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16: ; GCN-TRUE16: ; %bb.0: ; %entry ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill ; GCN-TRUE16-NEXT: ;;#ASMSTART ; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 ; 4-byte Folded Reload ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 @@ -306,19 +264,17 @@ define void @spill_2xi16_from_v2i16_one_free_reg() { ; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg: ; GCN-TRUE16: ; %bb.0: ; %entry ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill ; GCN-TRUE16-NEXT: ;;#ASMSTART ; GCN-TRUE16-NEXT: ;;#ASMEND -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload -; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.l ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll index 40aac82888de2..aed966e5728dd 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll @@ -506,7 +506,7 @@ define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) #0 { ; GFX11-TRUE16-LABEL: v_constrained_fpext_f16_to_f32_noabi: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index e1574dcd45462..93ffa6613b363 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -739,7 +739,7 @@ define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5] +; GFX11-NEXT: global_load_u16 v1, v0, s[4:5] ; GFX11-NEXT: global_load_u16 v2, v0, s[6:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v2, v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll index 9d8a45ada87aa..9b33b25b33846 100644 --- a/llvm/test/CodeGen/AMDGPU/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/umed3.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefix=GFX11-FAKE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefix=GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16,-d16-hw-bug < %s | FileCheck -check-prefix=GFX11-TRUE16 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index 0289dab4588a2..ba441a0029b51 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -738,7 +738,7 @@ define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5] +; GFX11-NEXT: global_load_u16 v1, v0, s[4:5] ; GFX11-NEXT: global_load_u16 v2, v0, s[6:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll index 53525c93f5b89..ad7fb12e8ea7e 100644 --- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll @@ -2232,11 +2232,10 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 @@ -2244,10 +2243,11 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ; GFX11-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-TRUE16-NEXT: s_cselect_b64 s[2:3], -1, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0x7fff, v0.l -; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, s[2:3] +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v1.l +; GFX11-TRUE16-NEXT: v_xor_b16 v0.h, 0x8000, v1.l +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.h, v0.l, s[2:3] ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll index d8044139aceb3..bad7ea716ca48 100644 --- a/llvm/test/CodeGen/AMDGPU/v_pack.ll +++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll @@ -4,7 +4,8 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-FAKE16 %s ; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-REAL16 %s -; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-REAL16 %s +; FIXME-TRUE16 enable gisel +; XUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-REAL16 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -88,21 +89,20 @@ define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace ; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_hi_b16 v0, v1, s[2:3] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h ; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART ; GFX11-GCN-REAL16-NEXT: ; use v0 ; GFX11-GCN-REAL16-NEXT: ;;#ASMEND ; GFX11-GCN-REAL16-NEXT: s_endpgm -; ; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16: ; GFX11-GISEL-REAL16: ; %bb.0: ; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -217,21 +217,20 @@ define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrs ; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_hi_b16 v0, v1, s[2:3] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v0.l -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h +; GFX11-GCN-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v1.l +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h ; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART ; GFX11-GCN-REAL16-NEXT: ; use v0 ; GFX11-GCN-REAL16-NEXT: ;;#ASMEND ; GFX11-GCN-REAL16-NEXT: s_endpgm -; ; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16_sub: ; GFX11-GISEL-REAL16: ; %bb.0: ; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -357,7 +356,6 @@ define amdgpu_kernel void @fptrunc( ; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l ; GFX11-GCN-REAL16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-GCN-REAL16-NEXT: s_endpgm -; ; GFX11-GISEL-REAL16-LABEL: fptrunc: ; GFX11-GISEL-REAL16: ; %bb.0: ; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -460,21 +458,20 @@ define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace( ; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_hi_b16 v0, v1, s[2:3] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, |v0.l|, |v0.h| ; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART ; GFX11-GCN-REAL16-NEXT: ; use v0 ; GFX11-GCN-REAL16-NEXT: ;;#ASMEND ; GFX11-GCN-REAL16-NEXT: s_endpgm -; ; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fabs: ; GFX11-GISEL-REAL16: ; %bb.0: ; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -591,21 +588,20 @@ define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace( ; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_b16 v0, v1, s[0:1] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: global_load_d16_hi_b16 v0, v1, s[2:3] glc dlc +; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc ; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l -; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l +; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l ; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, -v0.l, -v0.h ; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART ; GFX11-GCN-REAL16-NEXT: ; use v0 ; GFX11-GCN-REAL16-NEXT: ;;#ASMEND ; GFX11-GCN-REAL16-NEXT: s_endpgm -; ; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fneg: ; GFX11-GISEL-REAL16: ; %bb.0: ; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 diff --git a/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll b/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll index 07e9325095017..dcf8900d6dce3 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_rebroadcast.ll @@ -26,7 +26,7 @@ define <2 x i8> @shuffle_v2i8_rebroadcast(ptr addrspace(1) %arg0) { ; GFX11-TRUE16-LABEL: shuffle_v2i8_rebroadcast: ; GFX11-TRUE16: ; %bb.0: ; %entry ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off +; GFX11-TRUE16-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b16 v1.l, 8, v0.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index b01e92d6979a3..ecc3c8b2165ec 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -3027,23 +3027,14 @@ define void @shuffle_v4i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ; GFX10-NEXT: global_store_dword v[4:5], v0, off ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: shuffle_v4i8_concat: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: global_store_b32 v[4:5], v0, off -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: shuffle_v4i8_concat: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: global_store_b32 v[4:5], v0, off -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: shuffle_v4i8_concat: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: global_load_d16_hi_b16 v0, v[2:3], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[4:5], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <2 x i8>, ptr addrspace(1) %arg0 %val1 = load <2 x i8>, ptr addrspace(1) %arg1 %shuffle = shufflevector <2 x i8> %val0, <2 x i8> %val1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll index 04a5cac116d78..0e662850478be 100644 --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -398,7 +398,7 @@ define amdgpu_kernel void @no_widen_i16_constant_divergent_load(ptr addrspace(4) ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v0, s[0:1] +; GFX11-TRUE16-NEXT: global_load_u16 v0, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x3e7, v0.l ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, 4