Skip to content

Commit

Permalink
PM: Fix DB0 Hang
Browse files Browse the repository at this point in the history
        Key_Cronus_Test=PM_REGRESS

Change-Id: I706ec7b87e777b736153d5765ced0a3f6cea5d96
CQ: SW470688
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/81266
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: PPE CI <ppe-ci+hostboot@us.ibm.com>
Tested-by: Cronus HW CI <cronushw-ci+hostboot@us.ibm.com>
Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: HWSV CI <hwsv-ci+hostboot@us.ibm.com>
Reviewed-by: YUE DU <daviddu@us.ibm.com>
Reviewed-by: RANGANATHPRASAD G. BRAHMASAMUDRA <prasadbgr@in.ibm.com>
Reviewed-by: Jennifer A Stofer <stofer@us.ibm.com>
  • Loading branch information
rbatraAustinIBM authored and op-jenkins committed Sep 5, 2019
1 parent 8df77fd commit 1096b33
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 13 deletions.
1 change: 1 addition & 0 deletions import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ enum PM_CME_FLAGS_DEFS
CME_FLAGS_DROOP_SUSPEND_ENTRY = 14,
CME_FLAGS_SAFE_MODE = 16,
CME_FLAGS_PSTATES_SUSPENDED = 17,
CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED = 18,
CME_FLAGS_SPWU_CHECK_ENABLE = 22,
CME_FLAGS_BLOCK_ENTRY_STOP11 = 23,
CME_FLAGS_PSTATES_ENABLED = 24,
Expand Down
81 changes: 75 additions & 6 deletions import/chips/p9/procedures/ppe/iota/iota_uih.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2017 */
/* COPYRIGHT 2017,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand All @@ -32,6 +32,9 @@ int g_eimr_stack_ctr = -1;
uint64_t g_eimr_override_stack[IOTA_NUM_EXT_IRQ_PRIORITIES];
uint64_t g_eimr_override = 0x0000000000000000;
uint64_t g_ext_irq_vector = 0;
uint32_t g_db0_pending_fit_tick_count = 0;
uint32_t g_comm_recv_pending_fit_tick_count = 0;
uint32_t g_intercme_in0_pending_tick_count = 0;

// Unified IRQ priority and masking handler.
// - Locates the highest priority IRQ task vector that has at least one of its
Expand All @@ -51,13 +54,50 @@ uint32_t iota_uih(void)

do
{
if(ext_irq_vectors_cme[iPrtyLvl][IDX_PRTY_VEC] & g_ext_irq_vector)
//Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
//starvation case.
//
//Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
//than the STOP related interrupts,
//and can stay pending for very long time(~ms scale) on systems with
//high frequency of STOP requests. This can then prevent PGPE from
//completing OCC directed IPC operations within the expected
//time bounds(< 8ms)
//
//Mechanism:
//1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
//is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
//more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
//then we take action in UIH
//
//2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
//everything except Priority 0(xstop, exceptions, etc). This then allows a
//pending DB0 to complete
if(g_db0_pending_fit_tick_count > DB0_FIT_TICK_THRESHOLD)
{
bFound = 1;
iPrtyLvl = IDX_PRTY_LVL_DB0;
break;
}
else if(g_comm_recv_pending_fit_tick_count > COMM_RECV_FIT_TICK_THRESHOLD)
{
bFound = 1;
iPrtyLvl = IDX_PRTY_LVL_COMM_RECVD;
break;
}
else if(g_intercme_in0_pending_tick_count > INTERCME_IN0_FIT_TICK_THRESHOLD)
{
bFound = 1;
iPrtyLvl = IDX_PRTY_LVL_INTERCME_IN0;
break;
}
else if(ext_irq_vectors_cme[iPrtyLvl][IDX_PRTY_VEC] & g_ext_irq_vector)
{
bFound = 1;
break;
}
}
while(++iPrtyLvl < (IOTA_NUM_EXT_IRQ_PRIORITIES - 1)); //No need to check DISABLED.
while(++iPrtyLvl < (IOTA_NUM_EXT_IRQ_PRIORITIES - 1)); //No need to check DISABLED.

// Only manipulate EIMR masks for task level prty levels.
// Let shared non-task IRQs (iPrtyLvl=0) be processed by
Expand All @@ -82,9 +122,38 @@ uint32_t iota_uih(void)
}

// 3. Write the new mask for this priority level.
out64(CME_LCL_EIMR, ext_irq_vectors_cme[iPrtyLvl][IDX_MASK_VEC] |
g_eimr_override);

//Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
//starvation case.
//
//Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
//than the STOP related interrupts,
//and can stay pending for very long time(~ms scale) on systems with
//high frequency of STOP requests. This can then prevent PGPE from
//completing OCC directed IPC operations within the expected
//time bounds(< 8ms)
//
//Mechanism:
//1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
//is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
//more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
//then we take action in UIH
//
//2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
//everything except Priority 0(xstop, exceptions, etc). This then allows a
//pending DB0 to complete
if ((g_db0_pending_fit_tick_count > DB0_FIT_TICK_THRESHOLD) ||
(g_comm_recv_pending_fit_tick_count > COMM_RECV_FIT_TICK_THRESHOLD) ||
(g_intercme_in0_pending_tick_count > INTERCME_IN0_FIT_TICK_THRESHOLD))
{
PK_TRACE_INF("UIH: Starvation Detected. Overriding Mask!");
out64(CME_LCL_EIMR, (ext_irq_vectors_cme[0][IDX_MASK_VEC] |
g_eimr_override));
}
else
{
out64(CME_LCL_EIMR, ext_irq_vectors_cme[iPrtyLvl][IDX_MASK_VEC] |
g_eimr_override);
}
}
else
{
Expand Down
6 changes: 5 additions & 1 deletion import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2017,2018 */
/* COPYRIGHT 2017,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -46,6 +46,8 @@ CmeFitRecord G_cme_fit_record = {0, 0, 0, 0, 0xFFFFFFFF, 0};
#endif


void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();

uint32_t G_CME_LCL_EINR = CME_LCL_EINR;
uint32_t G_CME_LCL_EISR = CME_LCL_EISR;
uint32_t G_CME_LCL_EISR_CLR = CME_LCL_EISR_CLR;
Expand Down Expand Up @@ -102,6 +104,8 @@ void fit_handler()
p9_cme_core_livelock_buster();
#endif

//Handle DB0/Comm_Recv starvation case
p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();
}
#endif //fit handler

Expand Down
15 changes: 14 additions & 1 deletion import/chips/p9/procedures/ppe_closed/cme/p9_cme_irq.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2015,2018 */
/* COPYRIGHT 2015,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -48,6 +48,19 @@

#include <stdint.h>

//CME_TSEL is set to 8 which means FIT has period of 1.04ms when
//Nest Freq is 2000Mhz. Ideally, should calculate period of FIT based
//on nest frequency, but nest frequency is NOT plumbed to CME and we
//don't need to be highly accurate here.
//Note, from PGPE perspective, the latency of the DB0 operation depends
//on the amount of time DB0 is pending on Quad Manager plus COMM_RECV is pending
//on sibling. This is because COMM_RECV interrupt is triggered by the DB0
//handler on the quad manager. Therefore, we must set the COMM_RECV_TICK_THRESHOLD
//to be smaller.
#define DB0_FIT_TICK_THRESHOLD 1 //Threshold for DB0 pending count(2ms)
#define COMM_RECV_FIT_TICK_THRESHOLD 1 //Threshold for COMM_RECV pending countr(2ms)
#define INTERCME_IN0_FIT_TICK_THRESHOLD 1 //Threshold for COMM_RECV pending countr(2ms)

// Priority Levels
#define IDX_PRTY_LVL_HIPRTY 0
#define IDX_PRTY_LVL_DB3 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2016,2018 */
/* COPYRIGHT 2016,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -43,17 +43,21 @@
//
extern CmePstateRecord G_cme_pstate_record;
extern CmeRecord G_cme_record;
extern uint32_t g_comm_recv_pending_fit_tick_count;
extern uint32_t g_intercme_in0_pending_tick_count;

//
//InterCME_IN0 handler
//
void p9_cme_pstate_intercme_in0_irq_handler(void)
{
g_intercme_in0_pending_tick_count = 0;
p9_cme_pstate_process_db0_sibling();
}

void p9_cme_pstate_intercme_msg_handler(void)
{
g_comm_recv_pending_fit_tick_count = 0;
p9_cme_pstate_sibling_lock_and_intercme_protocol(INTERCME_MSG_LOCK_WAIT_ON_RECV);
}

Expand Down Expand Up @@ -119,6 +123,8 @@ void p9_cme_pstate_process_db0_sibling()
//Unmask EIMR[OCC_HEARTBEAT_LOST/4]
g_eimr_override &= ~BIT64(4);

out32(G_CME_LCL_FLAGS_OR, BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED));//Set Starvation Count enabled

//Clear Core GPMMR RESET_STATE_INDICATOR bit to show pstates have started
CME_PUTSCOM(PPM_GPMMR_CLR, G_cme_record.core_enabled, BIT64(15));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ extern CmePstateRecord G_cme_pstate_record;
extern cmeHeader_t* G_cmeHeader;
extern LocalPstateParmBlock* G_lppb;
extern uint8_t G_vdm_threshold_table[];
extern uint32_t g_db0_pending_fit_tick_count;
extern uint32_t g_comm_recv_pending_fit_tick_count;
extern uint32_t g_intercme_in0_pending_tick_count;
cppm_cmedb0_t G_dbData;


Expand All @@ -75,6 +78,7 @@ void p9_cme_pstate_db0_start();
void p9_cme_pstate_db0_glb_bcast();
void p9_cme_pstate_db0_clip_bcast();
void p9_cme_pstate_update();
void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter();

//
//Doorbell0 interrupt handler
Expand Down Expand Up @@ -166,6 +170,67 @@ void p9_cme_pstate_db0_handler(void)
g_eimr_override &= ~BIT64(4);
}

//
//Doorbell0/Comm Recv pending counter(called every FIT tick)
//
void p9_cme_pstate_db0_comm_recv_intercme_in0_pending_counter()
{
//Note: Special handling of DB0/COMM_RECV to handle the db0/comm_recv
//starvation case.
//
//Reason: DB0(Quad Manager CME) and COMM_RECV(Sibling CME) are lower priority
//than the STOP related interrupts,
//and can stay pending for very long time(~ms scale) on systems with
//high frequency of STOP requests. This can then prevent PGPE from
//completing OCC directed IPC operations within the expected
//time bounds(< 8ms)
//
//Mechanism:
//1)In FIT: Every FIT tick, we check if DB0(on Quad manager)/COMM_RECV(on Sibling CME)
//is pending. If DB0(on Quad manager)/COMM_RECV(on Sibling CME) is seen pending for
//more than DB0_FIT_TICK_THRESHOLD/COMM_RECV_FIT_TICK_THRESHOLD FIT ticks,
//then we take action in UIH
//
//2)In UIH: We set priority level to IDX_PRTY_LVL_DB0/IDX_PRTY_LVL_COMM_RECVD, and mask
//everything except Priority 0(xstop, exceptions, etc). This then allows a
//pending DB0 to complete
uint32_t cme_flags = in32(G_CME_LCL_FLAGS);

if (cme_flags & BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED))
{
if(G_cme_pstate_record.qmFlag)
{

if (cme_flags & BIT32(CME_FLAGS_CORE0_GOOD))
{
if (in32_sh(CME_LCL_EISR) & BIT64SH(36))
{
g_db0_pending_fit_tick_count++;
}
}
else
{
if (in32_sh(CME_LCL_EISR) & BIT64SH(37))
{
g_db0_pending_fit_tick_count++;
}
}
}
else
{
if (in32(CME_LCL_EISR) & BIT32(29))
{
g_comm_recv_pending_fit_tick_count++;
}

if(in32(CME_LCL_EISR) & BIT32(7))
{
g_intercme_in0_pending_tick_count++;
}
}
}
}

//
//Doorbell3 interrupt handler
//
Expand Down Expand Up @@ -660,6 +725,9 @@ void p9_cme_pstate_process_db0()
G_cme_pstate_record.updateAnalogError = 0;
uint64_t scom_data;

//Clear out db0_pending_tick_count
g_db0_pending_fit_tick_count = 0;

PK_TRACE_INF("PSTATE: Process DB0 Enter");

//Clear EISR and read DB0 register
Expand Down Expand Up @@ -856,7 +924,7 @@ inline void p9_cme_pstate_register()
}
}

PK_TRACE_INF("PSTATE: Sib Register MsgCnt=%d", msgCnt);
PK_TRACE_DBG("PSTATE: Sib Register MsgCnt=%d", msgCnt);
}
}
}
Expand Down Expand Up @@ -894,6 +962,7 @@ void p9_cme_pstate_db0_start()
ack = MSGID_PCB_TYPE4_ACK_PSTATE_PROTO_ACK;

out32(G_CME_LCL_FLAGS_OR, BIT32(24));//Set Pstates Enabled
out32(G_CME_LCL_FLAGS_OR, BIT32(CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED));//Set Starvation Count enabled

//Enable PMCR Interrupts (for good cores) when this task is done
g_eimr_override &= ~(uint64_t)(G_cme_record.core_enabled << SHIFT64(35));
Expand Down Expand Up @@ -1035,7 +1104,7 @@ inline void p9_cme_pstate_db0_pmsr_updt()
//Set Core GPMMR RESET_STATE_INDICATOR bit to show pstates have stopped
CME_PUTSCOM(PPM_GPMMR_OR, G_cme_record.core_enabled, BIT64(15));

PK_TRACE_INF("PSTATE: DB0 Safe Mode Exit");
PK_TRACE_INF("PSTATE: DB0 PMSR Updt Exit");
}

void p9_cme_pstate_notify_sib(INTERCME_DIRECT_INTF intf)
Expand All @@ -1058,7 +1127,7 @@ inline void p9_cme_pstate_freq_update(uint32_t cme_flags)
else
{
PK_TRACE_INF("PSTATE: Freq Updt Enter");
PK_TRACE_INF("PSTATE: Dpll0=0x%x", G_lppb->dpll_pstate0_value);
PK_TRACE_DBG("PSTATE: Dpll0=0x%x", G_lppb->dpll_pstate0_value);

//Adjust DPLL
qppm_dpll_freq_t dpllFreq;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,7 @@ void p9_pgpe_pstate_wov_init()
G_pgpe_pstate_record.wov.avg_freq_gt_target_freq = 0;
G_pgpe_pstate_record.wov.freq_loss_tenths_gt_max_droop_tenths = 0;
G_pgpe_pstate_record.wov.status = WOV_DISABLED;
G_pgpe_pstate_record.wov.info = 0xdeadbeef;
G_pgpe_pstate_record.wov.info = 0xdeadde04;
}

//
Expand Down

0 comments on commit 1096b33

Please sign in to comment.