Skip to content

Commit

Permalink
PFET: detect bad pfet through cme sgpe plumbling
Browse files Browse the repository at this point in the history
- checks for PFET Sense enable/disable ever being the same
- check at STOP power on and power off
- add FIT routine to peform check upon each interval
- if checks fail, mark the core in a CME scratch reg and halt
- CME halt is noticed by SGPE.  SGPE checks the CME scratch reg for
  either core to be marked.  Sets OCC LFIR[61] to id PFET issue.
- Use CPMMR bit5:6 instead of scratch1 reg for pfet failure bits

OCC LFIR bit will be used by p9_pm_callout to log PFET related
  errors by core. (not in this commit).

Change-Id: I86a06ad951cbe879f06ace10cd40a0a484454f23
CQ:SW508755
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/104675
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: PPE CI <ppe-ci+hostboot@us.ibm.com>
Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com>
Reviewed-by: Gregory S Still <stillgs@us.ibm.com>
Reviewed-by: RANGANATHPRASAD G. BRAHMASAMUDRA <prasadbgr@in.ibm.com>
Reviewed-by: NAREN A DEVAIAH <naren.devaiah@in.ibm.com>
  • Loading branch information
davidduyue authored and op-jenkins committed Oct 24, 2020
1 parent 071b078 commit bc04b3a
Show file tree
Hide file tree
Showing 15 changed files with 187 additions and 48 deletions.
4 changes: 4 additions & 0 deletions import/chips/p9/procedures/hwp/lib/p9_pm_hcd_flags.h
Expand Up @@ -102,6 +102,8 @@ enum PM_GPE_OCC_SCRATCH2_DEFS
PGPE_SAFE_MODE_ERROR = 14,
PM_DEBUG_HALT_ENABLE = 15,
CORE_THROTTLE_CONTINUOUS_CHANGE_ENABLE = 16,
CME_PFET_DELAY_TO_TIMEOUT = 20,
CME_PFET_TIMEOUT_INJECTION = 21,
PGPE_OP_TRACE_DISABLE = 24,
PGPE_OP_TRACE_MEM_MODE = 25

Expand Down Expand Up @@ -130,6 +132,8 @@ enum PM_CME_FLAGS_DEFS
CME_FLAGS_SAFE_MODE = 16,
CME_FLAGS_PSTATES_SUSPENDED = 17,
CME_FLAGS_DB0_COMM_RECV_STARVATION_CNT_ENABLED = 18,
CME_FLAGS_PFET_FIT_INJECTION = 20,
CME_FLAGS_PFET_TIMEOUT_INJECTION = 21,
CME_FLAGS_SPWU_CHECK_ENABLE = 22,
CME_FLAGS_BLOCK_ENTRY_STOP11 = 23,
CME_FLAGS_PSTATES_ENABLED = 24,
Expand Down
12 changes: 6 additions & 6 deletions import/chips/p9/procedures/ppe_closed/cme/cme_panic_codes.h
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2016,2018 */
/* COPYRIGHT 2016,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -57,7 +57,7 @@ CME_PVREF_FAIL_DETECTED = 0x1c0d,
CME_OCC_HEARTBEAT_LOST_DETECTED = 0x1c1c,
CME_CORE_CHECKSTOP_DETECTED = 0x1c1d,
CME_BAD_DD_LEVEL = 0x1c1e,
//_UNUSED_1c1f = 0x1c1f,
CME_BAD_PFET = 0x1c1f,
//
CME_STOP_EXIT_PHANTOM_WAKEUP = 0x1d00, // Stop Error
CME_STOP_EXIT_BCE_SCOM_FAILED = 0x1d01,
Expand All @@ -66,14 +66,14 @@ CME_STOP_EXIT_STARTCLK_FAILED = 0x1d03,
CME_STOP_EXIT_STARTCLK_XSTOP_ERROR = 0x1d04,
CME_STOP_EXIT_SCOM_RES_XSTOP_ERROR = 0x1d05,
CME_STOP_SPWU_PROTOCOL_ERROR = 0x1d06,
//_UNUSED_1d07 = 0x1d07,
CME_PFET_ENTRY_SENSE_TIMEOUT = 0x1d07,
CME_STOP_ENTRY_STOPCLK_FAILED = 0x1d08,
CME_STOP_ENTRY_XSTOP_ERROR = 0x1d09, // NDD1
CME_STOP_ENTRY_TRAP_INJECT = 0x1d0a,
CME_STOP_ENTRY_HANDOFF_LESSTHAN5 = 0x1d0d,
//_UNUSED_1d1c = 0x1d1c,
//_UNUSED_1d1d = 0x1d1d,
//_UNUSED_1d1e = 0x1d1e,
CME_PFET_EXIT_SENSE_TIMEOUT = 0x1d1c,
CME_PFET_EXIT_SENSE_TIMEOUT_INJECT = 0x1d1d,
CME_PFET_EXIT_SENSE_FIT_INJECT = 0x1d1e,
//_UNUSED_1d1f = 0x1d1f,

CME_PSTATE_RESCLK_ENABLED_AT_BOOT = 0x1e00, // Pstate Error
Expand Down
3 changes: 2 additions & 1 deletion import/chips/p9/procedures/ppe_closed/cme/p9_cme.h
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2017,2018 */
/* COPYRIGHT 2017,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -42,6 +42,7 @@ extern uint32_t G_CME_LCL_FLAGS;
extern uint32_t G_CME_LCL_FLAGS_CLR;
extern uint32_t G_CME_LCL_FLAGS_OR;
extern uint32_t G_CME_LCL_SRTCH0;
extern uint32_t G_CME_LCL_SRTCH1;
extern uint32_t G_CME_LCL_TSEL;
extern uint32_t G_CME_LCL_TBR;
extern uint32_t G_CME_LCL_DBG;
Expand Down
42 changes: 36 additions & 6 deletions import/chips/p9/procedures/ppe_closed/cme/p9_cme_iota_main.c
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2017,2019 */
/* COPYRIGHT 2017,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -61,6 +61,7 @@ uint32_t G_CME_LCL_FLAGS = CME_LCL_FLAGS;
uint32_t G_CME_LCL_FLAGS_CLR = CME_LCL_FLAGS_CLR;
uint32_t G_CME_LCL_FLAGS_OR = CME_LCL_FLAGS_OR;
uint32_t G_CME_LCL_SRTCH0 = CME_LCL_SRTCH0;
uint32_t G_CME_LCL_SRTCH1 = CME_LCL_SRTCH1;
uint32_t G_CME_LCL_TSEL = CME_LCL_TSEL;
uint32_t G_CME_LCL_TBR = CME_LCL_TBR;
uint32_t G_CME_LCL_DBG = CME_LCL_DBG;
Expand Down Expand Up @@ -88,6 +89,35 @@ void fit_handler()
data64_t scom_data;
scom_data.value = 0;

if( in32(G_CME_LCL_FLAGS) & BIT32(CME_FLAGS_PFET_FIT_INJECTION) )
{
// Inject Core 0 as bad
CME_PUTSCOM_NOP(CPPM_CPMMR_OR, CME_MASK_C0, BIT64(5));
PK_PANIC(CME_PFET_EXIT_SENSE_FIT_INJECT);
}

CME_GETSCOM(PPM_PFSNS, CME_MASK_C0, scom_data);

if( ( ( ( scom_data.words.upper & BIT32(0) ) == 1 ) &&
( ( scom_data.words.upper & BIT32(1) ) == 1 ) ) ||
( ( ( scom_data.words.upper & BIT32(0) ) == 0 ) &&
( ( scom_data.words.upper & BIT32(1) ) == 0 ) ) )
{
CME_PUTSCOM_NOP(CPPM_CPMMR_OR, CME_MASK_C0, BIT64(5));
PK_PANIC(CME_BAD_PFET);
}

CME_GETSCOM(PPM_PFSNS, CME_MASK_C1, scom_data);

if( ( ( ( scom_data.words.upper & BIT32(0) ) == 1 ) &&
( ( scom_data.words.upper & BIT32(1) ) == 1 ) ) ||
( ( ( scom_data.words.upper & BIT32(0) ) == 0 ) &&
( ( scom_data.words.upper & BIT32(1) ) == 0 ) ) )
{
CME_PUTSCOM_NOP(CPPM_CPMMR_OR, CME_MASK_C1, BIT64(6));
PK_PANIC(CME_BAD_PFET);
}

CME_GETSCOM_OR(CPPM_CSAR, CME_MASK_BC, scom_data.value);

if(BIT32(CPPM_CSAR_FIT_HCODE_ERROR_INJECT) & scom_data.words.upper)
Expand All @@ -98,7 +128,7 @@ void fit_handler()


mtspr(SPRN_TSR, TSR_FIS);
PK_TRACE("FIT Timer Handler");
//PK_TRACE("FIT Timer Handler");

#if !DISABLE_PERIODIC_CORE_QUIESCE && (NIMBUS_DD_LEVEL == 20 || NIMBUS_DD_LEVEL == 21 || CUMULUS_DD_LEVEL == 10)
p9_cme_core_livelock_buster();
Expand Down Expand Up @@ -198,7 +228,7 @@ int main()

pk_trace_set_freq(trace_timebase);

PK_TRACE(">CME MAIN");
// PK_TRACE(">CME MAIN");

// Clear SPRG0
ppe42_app_ctx_set(0);
Expand All @@ -212,20 +242,20 @@ int main()
out32(G_CME_LCL_LMCR_OR, BITS32(8, 2));
#endif

PK_TRACE("Set Watch Dog Timer Rate to 6 and FIT Timer Rate to 8");
// PK_TRACE("Set Watch Dog Timer Rate to 6 and FIT Timer Rate to 8");
out32(G_CME_LCL_TSEL, (BITS32(1, 2) | BIT32(4)));

#if (!DISABLE_CME_FIT_TIMER || ENABLE_CME_DEC_TIMER)

uint32_t TCR_VAL = 0;

#if !DISABLE_CME_FIT_TIMER
PK_TRACE("Enable FIT Timer");
// PK_TRACE("Enable FIT Timer");
TCR_VAL |= TCR_FIE;
#endif

#if ENABLE_CME_DEC_TIMER
PK_TRACE("Enable DEC Timer");
// PK_TRACE("Enable DEC Timer");
TCR_VAL |= TCR_DIE;
#endif

Expand Down
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2016,2019 */
/* COPYRIGHT 2016,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -63,7 +63,7 @@ void p9_cme_pstate_intercme_msg_handler(void)

void p9_cme_pstate_sibling_lock_and_intercme_protocol(INTERCME_MSG_LOCK_ACTION intercme_msg_lock_action)
{
PK_TRACE_INF("SIBL: Enter");
PK_TRACE_DBG("SIBL: Enter");
uint32_t msg;

if (intercme_msg_lock_action == INTERCME_MSG_LOCK_WAIT_ON_RECV)
Expand Down Expand Up @@ -101,7 +101,7 @@ void p9_cme_pstate_process_db0_sibling()
//writes same value for both cores
CME_GETSCOM(CPPM_CMEDB0, G_cme_pstate_record.firstGoodCoreMask, dbData.value);

PK_TRACE_INF("INTER0: Enter");
PK_TRACE_DBG("INTER0: Enter");

dbQuadInfo = (dbData.value >> (in32(G_CME_LCL_SRTCH0) &
(BITS32(CME_SCRATCH_LOCAL_PSTATE_IDX_START, CME_SCRATCH_LOCAL_PSTATE_IDX_LENGTH)
Expand Down
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2016,2019 */
/* COPYRIGHT 2016,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -368,7 +368,7 @@ void p9_cme_pstate_db3_handler(void)
}
else if (db3.fields.cme_message_numbern == MSGID_DB3_CLIP_BROADCAST)
{
PK_TRACE_INF("PSTATE: DB3 Clip Enter");
PK_TRACE_DBG("PSTATE: DB3 Clip Enter");

uint32_t dbQuadInfo, dbBit8_15;
cppm_cmedb0_t dbData;
Expand Down Expand Up @@ -732,7 +732,7 @@ void p9_cme_pstate_process_db0()
//Clear out db0_pending_tick_count
g_db0_pending_fit_tick_count = 0;

PK_TRACE_INF("PSTATE: Process DB0 Enter");
PK_TRACE_DBG("PSTATE: Process DB0 Enter");

//Clear EISR and read DB0 register
out32_sh(CME_LCL_EISR_CLR, BITS64SH(36, 2));
Expand Down Expand Up @@ -942,7 +942,7 @@ inline void p9_cme_pstate_register()
//
void p9_cme_pstate_db0_start()
{
PK_TRACE_INF("PSTATE: DB0 Start Enter");
PK_TRACE_DBG("PSTATE: DB0 Start Enter");

p9_cme_pstate_update();
uint32_t ack;
Expand Down Expand Up @@ -989,7 +989,7 @@ void p9_cme_pstate_db0_start()
//
void p9_cme_pstate_db0_glb_bcast()
{
PK_TRACE_INF("PSTATE: DB0 GlbBcast Enter");
PK_TRACE_DBG("PSTATE: DB0 GlbBcast Enter");

p9_cme_pstate_update();
uint32_t ack;
Expand All @@ -1015,7 +1015,7 @@ void p9_cme_pstate_db0_glb_bcast()
//
inline void p9_cme_pstate_db0_stop()
{
PK_TRACE_INF("PSTATE: DB0 Stop Enter");
PK_TRACE_DBG("PSTATE: DB0 Stop Enter");

out32(G_CME_LCL_FLAGS_CLR, BIT32(24));//Set Pstates Disabled

Expand Down Expand Up @@ -1045,7 +1045,7 @@ inline void p9_cme_pstate_db0_stop()
void p9_cme_pstate_db0_clip_bcast()
{

PK_TRACE_INF("PSTATE: DB0 Clip Enter");
PK_TRACE_DBG("PSTATE: DB0 Clip Enter");


uint32_t dbBit8_15 = (G_dbData.value & BITS64(8, 8)) >> SHIFT64(15);
Expand Down Expand Up @@ -1082,7 +1082,7 @@ void p9_cme_pstate_db0_clip_bcast()

inline void p9_cme_pstate_db0_pmsr_updt()
{
PK_TRACE_INF("PSTATE: DB0 Pmsr Updt Enter");
PK_TRACE_DBG("PSTATE: DB0 Pmsr Updt Enter");

uint32_t dbBit8_15 = (G_dbData.value & BITS64(8, 8)) >> SHIFT64(15);

Expand Down Expand Up @@ -1129,7 +1129,7 @@ inline void p9_cme_pstate_freq_update(uint32_t cme_flags)
}
else
{
PK_TRACE_INF("PSTATE: Freq Updt Enter");
PK_TRACE_DBG("PSTATE: Freq Updt Enter");
PK_TRACE_DBG("PSTATE: Dpll0=0x%x", G_lppb->dpll_pstate0_value);

//Adjust DPLL
Expand Down Expand Up @@ -1237,7 +1237,7 @@ void p9_cme_pstate_update()
{
PkMachineContext ctx;

PK_TRACE_INF("PSTATE: Pstate Updt Enter");
PK_TRACE_DBG("PSTATE: Pstate Updt Enter");

G_cme_pstate_record.nextPstate = (G_dbData.value >> (in32(G_CME_LCL_SRTCH0) &
(BITS32(CME_SCRATCH_LOCAL_PSTATE_IDX_START,
Expand Down
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2016,2018 */
/* COPYRIGHT 2016,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -94,7 +94,7 @@ void p9_cme_pstate_pmcr_handler(void)
//
void p9_cme_init_done()
{
PK_TRACE_INF("CME INIT DONE: Enter");
PK_TRACE_DBG("CME INIT DONE: Enter");
uint32_t msg;

// Synchronization between QM and Sibling
Expand Down
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2015,2018 */
/* COPYRIGHT 2015,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -179,8 +179,8 @@ void bce_irr_run()

if (l_data32 != G_bce_irr.data.word)
{
PKTRACE("miscompare between bce irr read[%x] and cme shadow copy[%x]",
l_data32, G_bce_irr.data.word);
PK_TRACE_DBG("miscompare between bce irr read[%x] and cme shadow copy[%x]",
l_data32, G_bce_irr.data.word);
pk_halt();
}

Expand Down
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HCODE Project */
/* */
/* COPYRIGHT 2015,2019 */
/* COPYRIGHT 2015,2020 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -224,6 +224,8 @@ void p9_cme_pcbmux_savior_epilogue(uint32_t core)
void
p9_cme_stop_entry()
{
int i = 0;
uint32_t timeout = 0;
int catchup_ongoing = 0;
int entry_ongoing = 1;
uint8_t target_level = 0;
Expand Down Expand Up @@ -1441,17 +1443,47 @@ p9_cme_stop_entry()
PK_PANIC(CME_STOP_ENTRY_TRAP_INJECT);
}

//500Mhz gives 2ns per ppe cycle
//pfet or stop should finish within 1ms
//set delay to 20ns
#define PFET_DELAY 20
#define PFET_TIMEOUT 20000

timeout = PFET_TIMEOUT;

do
{
CME_GETSCOM_AND(PPM_PFSNS, core, scom_data.value);

timeout--;

if( !timeout )
{
CME_PUTSCOM_NOP(CPPM_CPMMR_OR, core, ((uint64_t)(core) << SHIFT64(6)));
//PK_TRACE_ERR("PFET SENSE TIMED OUT, HALT CME!");

// if( in32(G_CME_LCL_FLAGS) & BIT32(CME_FLAGS_PFET_DELAY_TO_TIMEOUT) )
// {
PK_PANIC(CME_PFET_ENTRY_SENSE_TIMEOUT);
// }
// else
// {
// break;
// }
}

for(i = 0; i < PFET_DELAY; i++)
{
asm volatile ("tw 0, 0, 0");
}
}
while(!(scom_data.words.upper & BIT32(1)));

PK_TRACE("Turn off force voff via PFCS[0-1]");
// vdd_pfet_force_state = 00 (Nop)
CME_PUTSCOM(PPM_PFCS_CLR, core, BITS64(0, 2));

PK_TRACE_INF("SE.4A: Core[%d] Powered Off", core);
PK_TRACE_DBG("SE.4A: Core[%d] Powered Off", core);

#endif

Expand Down

0 comments on commit bc04b3a

Please sign in to comment.