-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AMDGPU] Limit allocation of lo128 registers for occupancy #173100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/rampitec/lo128-regs-from-all-banks
Are you sure you want to change the base?
[AMDGPU] Limit allocation of lo128 registers for occupancy #173100
Conversation
This stack of pull requests is managed by sgh. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesParent change allows allocation of lo128 VGPRs from all 4 banks. Limit the available allocation order to the occupancy. Both hard Patch is 32.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173100.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 838d31df5bacd..b727c4680ad5b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -639,8 +639,20 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1
let AltOrders = [(add (sequence "VGPR%u", 0, 127),
(sequence "VGPR%u", 256, 383),
(sequence "VGPR%u", 512, 639),
- (sequence "VGPR%u", 768, 895))];
- let AltOrderSelect = [{ return 1; }];
+ (sequence "VGPR%u", 768, 895)),
+ (add (sequence "VGPR%u", 0, 127),
+ (sequence "VGPR%u", 256, 383),
+ (sequence "VGPR%u", 512, 639)),
+ (add (sequence "VGPR%u", 0, 127),
+ (sequence "VGPR%u", 256, 383)),
+ (add (sequence "VGPR%u", 0, 127))];
+ let AltOrderSelect = [{
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned N = ST.getMaxNumVGPRs(MF);
+ unsigned SchedWaves = MF.getInfo<SIMachineFunctionInfo>()->getMinAllowedOccupancy();
+ N = std::min(N, ST.getMaxNumVGPRs(SchedWaves, 0));
+ return N > 768 ? 1 : N > 512 ? 2 : N > 256 ? 3 : 4;
+ }];
let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let GeneratePressureSet = 0;
let Size = 32;
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir
index ba0bfadb248c1..2f199409a8dc1 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-vgpr_lo128-gfx1250.mir
@@ -6,8 +6,14 @@
define amdgpu_kernel void @rcp_f16_above_256_vregs() #0 { ret void }
define amdgpu_kernel void @rcp_f16_above_512_vregs() #0 { ret void }
define amdgpu_kernel void @rcp_f16_above_768_vregs() #0 { ret void }
-
- attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-waves-per-eu"="1" }
+ define amdgpu_kernel void @rcp_f16_above_128_vregs_occ4() #2 { ret void }
+ define amdgpu_kernel void @rcp_f16_above_256_vregs_occ2() #1 { ret void }
+ define amdgpu_kernel void @rcp_f16_above_128_vregs_sched_occ4() #0 { ret void }
+ define amdgpu_kernel void @rcp_f16_above_256_vregs_sched_occ2() #0 { ret void }
+
+ attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-waves-per-eu"="1,1" }
+ attributes #1 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2" }
+ attributes #2 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4,4" }
...
# GCN-LABEL: name: rcp_f16_above_128_vregs
@@ -21,6 +27,8 @@
---
name: rcp_f16_above_128_vregs
tracksRegLiveness: true
+machineFunctionInfo:
+ occupancy: 1
body: |
bb.0:
; Occupy all low 128 VGPRs:
@@ -43,6 +51,8 @@ body: |
---
name: rcp_f16_above_256_vregs
tracksRegLiveness: true
+machineFunctionInfo:
+ occupancy: 1
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383
@@ -64,6 +74,8 @@ body: |
---
name: rcp_f16_above_512_vregs
tracksRegLiveness: true
+machineFunctionInfo:
+ occupancy: 1
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639
@@ -83,6 +95,8 @@ body: |
---
name: rcp_f16_above_768_vregs
tracksRegLiveness: true
+machineFunctionInfo:
+ occupancy: 1
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639, $vgpr768_vgpr769_vgpr770_vgpr771_vgpr772_vgpr773_vgpr774_vgpr775_vgpr776_vgpr777_vgpr778_vgpr779_vgpr780_vgpr781_vgpr782_vgpr783, $vgpr784_vgpr785_vgpr786_vgpr787_vgpr788_vgpr789_vgpr790_vgpr791_vgpr792_vgpr793_vgpr794_vgpr795_vgpr796_vgpr797_vgpr798_vgpr799, $vgpr800_vgpr801_vgpr802_vgpr803_vgpr804_vgpr805_vgpr806_vgpr807_vgpr808_vgpr809_vgpr810_vgpr811_vgpr812_vgpr813_vgpr814_vgpr815, $vgpr816_vgpr817_vgpr818_vgpr819_vgpr820_vgpr821_vgpr822_vgpr823_vgpr824_vgpr825_vgpr826_vgpr827_vgpr828_vgpr829_vgpr830_vgpr831, $vgpr832_vgpr833_vgpr834_vgpr835_vgpr836_vgpr837_vgpr838_vgpr839_vgpr840_vgpr841_vgpr842_vgpr843_vgpr844_vgpr845_vgpr846_vgpr847, $vgpr848_vgpr849_vgpr850_vgpr851_vgpr852_vgpr853_vgpr854_vgpr855_vgpr856_vgpr857_vgpr858_vgpr859_vgpr860_vgpr861_vgpr862_vgpr863, $vgpr864_vgpr865_vgpr866_vgpr867_vgpr868_vgpr869_vgpr870_vgpr871_vgpr872_vgpr873_vgpr874_vgpr875_vgpr876_vgpr877_vgpr878_vgpr879, $vgpr880_vgpr881_vgpr882_vgpr883_vgpr884_vgpr885_vgpr886_vgpr887_vgpr888_vgpr889_vgpr890_vgpr891_vgpr892_vgpr893_vgpr894_vgpr895
@@ -92,3 +106,85 @@ body: |
S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263_vgpr264_vgpr265_vgpr266_vgpr267_vgpr268_vgpr269_vgpr270_vgpr271, implicit $vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277_vgpr278_vgpr279_vgpr280_vgpr281_vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287, implicit $vgpr288_vgpr289_vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297_vgpr298_vgpr299_vgpr300_vgpr301_vgpr302_vgpr303, implicit $vgpr304_vgpr305_vgpr306_vgpr307_vgpr308_vgpr309_vgpr310_vgpr311_vgpr312_vgpr313_vgpr314_vgpr315_vgpr316_vgpr317_vgpr318_vgpr319, implicit $vgpr320_vgpr321_vgpr322_vgpr323_vgpr324_vgpr325_vgpr326_vgpr327_vgpr328_vgpr329_vgpr330_vgpr331_vgpr332_vgpr333_vgpr334_vgpr335, implicit $vgpr336_vgpr337_vgpr338_vgpr339_vgpr340_vgpr341_vgpr342_vgpr343_vgpr344_vgpr345_vgpr346_vgpr347_vgpr348_vgpr349_vgpr350_vgpr351, implicit $vgpr352_vgpr353_vgpr354_vgpr355_vgpr356_vgpr357_vgpr358_vgpr359_vgpr360_vgpr361_vgpr362_vgpr363_vgpr364_vgpr365_vgpr366_vgpr367, implicit $vgpr368_vgpr369_vgpr370_vgpr371_vgpr372_vgpr373_vgpr374_vgpr375_vgpr376_vgpr377_vgpr378_vgpr379_vgpr380_vgpr381_vgpr382_vgpr383, implicit $vgpr512_vgpr513_vgpr514_vgpr515_vgpr516_vgpr517_vgpr518_vgpr519_vgpr520_vgpr521_vgpr522_vgpr523_vgpr524_vgpr525_vgpr526_vgpr527, implicit $vgpr528_vgpr529_vgpr530_vgpr531_vgpr532_vgpr533_vgpr534_vgpr535_vgpr536_vgpr537_vgpr538_vgpr539_vgpr540_vgpr541_vgpr542_vgpr543, implicit $vgpr544_vgpr545_vgpr546_vgpr547_vgpr548_vgpr549_vgpr550_vgpr551_vgpr552_vgpr553_vgpr554_vgpr555_vgpr556_vgpr557_vgpr558_vgpr559, implicit $vgpr560_vgpr561_vgpr562_vgpr563_vgpr564_vgpr565_vgpr566_vgpr567_vgpr568_vgpr569_vgpr570_vgpr571_vgpr572_vgpr573_vgpr574_vgpr575, implicit $vgpr576_vgpr577_vgpr578_vgpr579_vgpr580_vgpr581_vgpr582_vgpr583_vgpr584_vgpr585_vgpr586_vgpr587_vgpr588_vgpr589_vgpr590_vgpr591, implicit $vgpr592_vgpr593_vgpr594_vgpr595_vgpr596_vgpr597_vgpr598_vgpr599_vgpr600_vgpr601_vgpr602_vgpr603_vgpr604_vgpr605_vgpr606_vgpr607, implicit $vgpr608_vgpr609_vgpr610_vgpr611_vgpr612_vgpr613_vgpr614_vgpr615_vgpr616_vgpr617_vgpr618_vgpr619_vgpr620_vgpr621_vgpr622_vgpr623, implicit $vgpr624_vgpr625_vgpr626_vgpr627_vgpr628_vgpr629_vgpr630_vgpr631_vgpr632_vgpr633_vgpr634_vgpr635_vgpr636_vgpr637_vgpr638_vgpr639, implicit $vgpr768_vgpr769_vgpr770_vgpr771_vgpr772_vgpr773_vgpr774_vgpr775_vgpr776_vgpr777_vgpr778_vgpr779_vgpr780_vgpr781_vgpr782_vgpr783, implicit $vgpr784_vgpr785_vgpr786_vgpr787_vgpr788_vgpr789_vgpr790_vgpr791_vgpr792_vgpr793_vgpr794_vgpr795_vgpr796_vgpr797_vgpr798_vgpr799, implicit $vgpr800_vgpr801_vgpr802_vgpr803_vgpr804_vgpr805_vgpr806_vgpr807_vgpr808_vgpr809_vgpr810_vgpr811_vgpr812_vgpr813_vgpr814_vgpr815, implicit $vgpr816_vgpr817_vgpr818_vgpr819_vgpr820_vgpr821_vgpr822_vgpr823_vgpr824_vgpr825_vgpr826_vgpr827_vgpr828_vgpr829_vgpr830_vgpr831, implicit $vgpr832_vgpr833_vgpr834_vgpr835_vgpr836_vgpr837_vgpr838_vgpr839_vgpr840_vgpr841_vgpr842_vgpr843_vgpr844_vgpr845_vgpr846_vgpr847, implicit $vgpr848_vgpr849_vgpr850_vgpr851_vgpr852_vgpr853_vgpr854_vgpr855_vgpr856_vgpr857_vgpr858_vgpr859_vgpr860_vgpr861_vgpr862_vgpr863, implicit $vgpr864_vgpr865_vgpr866_vgpr867_vgpr868_vgpr869_vgpr870_vgpr871_vgpr872_vgpr873_vgpr874_vgpr875_vgpr876_vgpr877_vgpr878_vgpr879, implicit $vgpr880_vgpr881_vgpr882_vgpr883_vgpr884_vgpr885_vgpr886_vgpr887_vgpr888_vgpr889_vgpr890_vgpr891_vgpr892_vgpr893_vgpr894_vgpr895
...
+
+# GCN-LABEL: name: rcp_f16_above_128_vregs_occ4
+
+# VGPR budget is 256 registers, do not use v256.
+
+# GCN: $vgpr0 = IMPLICIT_DEF
+# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0
+---
+name: rcp_f16_above_128_vregs_occ4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; Occupy all low 128 VGPRs:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127
+
+ %0:vgpr_32_lo128 = IMPLICIT_DEF
+ %1:vgpr_32_lo128 = V_RCP_F16_fake16_e32 killed %0, implicit $mode, implicit $exec
+
+ S_ENDPGM 0, implicit %1, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127
+...
+
+# GCN-LABEL: name: rcp_f16_above_256_vregs_occ2
+
+# VGPR budget is 512 registers, do not use v512.
+
+# GCN: $vgpr0 = IMPLICIT_DEF
+# GCN: $vgpr0 = V_RCP_F16_fake16_e32 undef $vgpr0
+---
+name: rcp_f16_above_256_vregs_occ2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr9...
[truncated]
|
shiltian
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me on top of the changes in the parent PR.
Parent change allows allocation of lo128 VGPRs from all 4 banks. That may result in the undesired allocation leaving a hole of maximum 128 registers in case if for example v0-v127 are allocated, and v128-v255 are free. Limit the available allocation order to the occupancy. Both hard occupancy limits and occupancy achieved during scheduling are considered. That is better to spill a register than to drop occupancy in this case.
6e916e3 to
1e82d04
Compare
d7bd9f6 to
534b6fe
Compare
Parent change allows allocation of lo128 VGPRs from all 4 banks.
That may result in the undesired allocation leaving a hole of
maximum 128 registers in case if for example v0-v127 are allocated,
and v128-v255 are free.
Limit the available allocation order to the occupancy. Both hard
occupancy limits and occupancy achieved during scheduling are
considered. That is better to spill a register than to drop occupancy
in this case.