Skip to content

Commit

Permalink
Misc. fixes for PM Malf/Stop Recovery with CME injects
Browse files Browse the repository at this point in the history
- Auto Spl wkup fix in malf path (leave hyp marked bad cores alone)
- Move Malf Handling (OCC FLG2 [29]) enable from sgpe init to later in PM Init
- Enable PM callout tracing (debug only)
- FAPI_ASSERT_NOEXIT UE to Recoverable fix (avoid unwanted err logs)
- OCC Spl wkup fix (fix bad target usage)
- Fix ffdc reg address not being added to ffdc region

Change-Id: I1771e178c12bcc6a9ab03ac48c7ad01cafa68ca7
CQ: SW438835
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/63324
Reviewed-by: Prem Shanker Jha <premjha2@in.ibm.com>
Reviewed-by: RANGANATHPRASAD G. BRAHMASAMUDRA <prasadbgr@in.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: HWSV CI <hwsv-ci+hostboot@us.ibm.com>
Reviewed-by: Gregory S. Still <stillgs@us.ibm.com>
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/63331
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
  • Loading branch information
am10dolkar authored and crgeddes committed Aug 2, 2018
1 parent d175f43 commit 9787cfb
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 52 deletions.
9 changes: 5 additions & 4 deletions src/import/chips/p9/procedures/hwp/pm/p9_pm_callout.C
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ fapi2::ReturnCode CoreAction::updateCoreAndQuadConfigState()

l_ccsrData &= l_clearMask;

FAPI_DBG( "Clear Mask 0x%016llx CCSR 0x%016lx",
l_clearMask, l_ccsrData );
FAPI_DBG ( "Clear Mask 0x%016llx CCSR 0x%016lx",
l_clearMask, l_ccsrData );

FAPI_TRY( putScom( iv_procChipTgt, PU_OCB_OCI_CCSR_SCOM, l_ccsrData ),
"Failed To Write CCSR Register" );
Expand Down Expand Up @@ -262,7 +262,7 @@ fapi2::ReturnCode CoreAction :: init( )
"Failed To Read OCC Flag2 Register" );
l_scomData.extract<0, 32>( iv_deadCoreVect );

FAPI_DBG( "Phyp Dead Core Vector 0x%08lx", iv_deadCoreVect );
FAPI_DBG ( "Phyp Dead Core Vector 0x%08lx", iv_deadCoreVect );

fapi_try_exit:
return fapi2::current_err;
Expand Down Expand Up @@ -329,6 +329,7 @@ extern "C"
"Failed To Clear PCB Interrupts" );

l_coreActn.getDeadCoreVector( o_deadCores ); //retrieve Phyp generated dead core vector
FAPI_INF("Dead cores from PHYP: 0x%08x", o_deadCores);

for( uint8_t l_ffdcSecId = 0; l_ffdcSecId < MAX_FFDC_SUMMARY_SECTN_CNT;
l_ffdcSecId++ )
Expand Down Expand Up @@ -410,7 +411,7 @@ extern "C"
o_rasAction = PROC_CHIP_CALLOUT;
}

FAPI_DBG( "FFDC Summary Sectn Count 0x%08x", o_ffdcList.size() );
FAPI_INF( "FFDC Summary Sectn Count 0x%08x", o_ffdcList.size() );

fapi_try_exit:

Expand Down
31 changes: 28 additions & 3 deletions src/import/chips/p9/procedures/hwp/pm/p9_pm_init.C
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
// Includes
// -----------------------------------------------------------------------------
#include <p9_pm_init.H>
#include <p9n2_misc_scom_addresses.H>

static const uint64_t PU_OCB_OCI_OCCFLG2_CLEAR = P9N2_PU_OCB_OCI_OCCFLG2_SCOM1;
static const uint64_t PU_OCB_OCI_OCCFLG2_SET = P9N2_PU_OCB_OCI_OCCFLG2_SCOM2;

// -----------------------------------------------------------------------------
// Function prototypes
Expand Down Expand Up @@ -123,6 +127,10 @@ fapi2::ReturnCode pm_init(
fapi2::ReturnCode l_rc;
fapi2::ATTR_PM_MALF_CYCLE_Type l_malfCycle =
fapi2::ENUM_ATTR_PM_MALF_CYCLE_INACTIVE;
fapi2::ATTR_PM_MALF_ALERT_ENABLE_Type malfAlertEnable =
fapi2::ENUM_ATTR_PM_MALF_ALERT_ENABLE_FALSE;
fapi2::buffer<uint64_t> l_data64 = 0;
const fapi2::Target<fapi2::TARGET_TYPE_SYSTEM> FAPI_SYSTEM;

// ************************************************************************
// Initialize Cores and Quads
Expand Down Expand Up @@ -250,10 +258,27 @@ fapi2::ReturnCode pm_init(
FAPI_TRY(l_rc, "ERROR: Failed to initialize OCC PPC405");
FAPI_TRY(p9_pm_glob_fir_trace(i_target, "After OCC PPC405 init"));

FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_PM_MALF_ALERT_ENABLE,
FAPI_SYSTEM,
malfAlertEnable),
"Error from FAPI_ATTR_GET for attribute ATTR_PM_MALF_ALERT_ENABLE");
// Set the Malf Alert Enabled policy to OCCFLG2 reg bit 29
l_data64.flush<0>().setBit<p9hcd::STOP_RECOVERY_TRIGGER_ENABLE>();

if (malfAlertEnable == fapi2::ENUM_ATTR_PM_MALF_ALERT_ENABLE_TRUE)
{
FAPI_TRY(fapi2::putScom(i_target, PU_OCB_OCI_OCCFLG2_SET, l_data64));
}
else
{
FAPI_TRY(fapi2::putScom(i_target, PU_OCB_OCI_OCCFLG2_CLEAR, l_data64));
}

FAPI_IMP ("Malf Alert Policy Enabled: %d", malfAlertEnable);

fapi_try_exit:

return fapi2::current_err;

}

fapi2::ReturnCode clear_occ_special_wakeups(
Expand All @@ -276,10 +301,10 @@ fapi2::ReturnCode clear_occ_special_wakeups(
l_ex_num));

FAPI_DBG("Clear OCC special wakeup on ex chiplet 0x%08X", l_ex_num);
FAPI_TRY(fapi2::getScom(i_target, EX_PPM_SPWKUP_OCC, l_data64),
FAPI_TRY(fapi2::getScom(l_ex_chplt, EX_PPM_SPWKUP_OCC, l_data64),
"ERROR: Failed to read OCC Spl wkup on EX 0x%08X", l_ex_num);
l_data64.clearBit<0>();
FAPI_TRY(fapi2::putScom(i_target, EX_PPM_SPWKUP_OCC, l_data64),
FAPI_TRY(fapi2::putScom(l_ex_chplt, EX_PPM_SPWKUP_OCC, l_data64),
"ERROR: Failed to clear OCC Spl wkup on EX 0x%08X", l_ex_num);
}

Expand Down
52 changes: 36 additions & 16 deletions src/import/chips/p9/procedures/hwp/pm/p9_pm_reset.C
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,6 @@ p9_pm_reset_psafe_update(const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>& i_ta
FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_SAFE_MODE_NOVDM_UPLIFT_MV, i_target, l_uplift_mv));
FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_EXTERNAL_VRM_STEPSIZE, FAPI_SYSTEM, l_ext_vrm_step_size_mv));
l_attr_safe_mode_mv += l_uplift_mv;

//Reset safe mode attributes
FAPI_TRY(FAPI_ATTR_SET(fapi2::ATTR_SAFE_MODE_FREQUENCY_MHZ, i_target, l_attr_reset_safe_mode_freq_mhz));
FAPI_TRY(FAPI_ATTR_SET(fapi2::ATTR_SAFE_MODE_VOLTAGE_MV, i_target, l_attr_reset_safe_mode_mv));
Expand Down Expand Up @@ -632,6 +631,18 @@ p9_pm_set_auto_spwkup(const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>& i_targe

FAPI_INF(">> p9_set_auto_spwkup");

fapi2::buffer <uint64_t> l_deadCoreVector;
uint8_t l_malfAlertActive = fapi2::ENUM_ATTR_PM_MALF_CYCLE_INACTIVE;
FAPI_TRY (FAPI_ATTR_GET (fapi2::ATTR_PM_MALF_CYCLE, i_target,
l_malfAlertActive));

if (l_malfAlertActive == fapi2::ENUM_ATTR_PM_MALF_CYCLE_ACTIVE)
{
FAPI_TRY( getScom (i_target, P9N2_PU_OCB_OCI_OCCFLG2_SCOM,
l_deadCoreVector),
"Failed to Read OCC Flag2 Register for PM Malf Dead Core Vector" );
}

// For each EX target
for (auto& l_ex_chplt : i_target.getChildren<fapi2::TARGET_TYPE_EX>
(fapi2::TARGET_STATE_FUNCTIONAL))
Expand All @@ -640,8 +651,6 @@ p9_pm_set_auto_spwkup(const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>& i_targe
fapi2::buffer<uint64_t> l_gpmmr;
fapi2::buffer<uint64_t> l_lmcr;
uint32_t l_bit;
fapi2::ATTR_PM_MALF_CYCLE_Type l_malfAlertActive =
fapi2::ENUM_ATTR_PM_MALF_CYCLE_INACTIVE;

fapi2::ATTR_CHIP_UNIT_POS_Type l_ex_num;
FAPI_TRY(FAPI_ATTR_GET( fapi2::ATTR_CHIP_UNIT_POS,
Expand All @@ -661,38 +670,49 @@ p9_pm_set_auto_spwkup(const fapi2::Target<fapi2::TARGET_TYPE_PROC_CHIP>& i_targe
FAPI_TRY(fapi2::getScom(l_core, C_PPM_GPMMR_SCOM, l_gpmmr),
"GetScom of GPMMR failed");

if (l_deadCoreVector.getBit(l_core_num))
{
// HYP could not spl. wkup. this core on a PM Malf HMI.
// Leave it that way and do not enable auto spl. wkup. on it
FAPI_INF ("==> Skip auto spl wkup enabled on HYP dead core %d", l_core_num);
continue; // move on to the next core
}

if (l_gpmmr.getBit<EQ_PPM_GPMMR_SPECIAL_WKUP_DONE>())
{
// Clear the auto special wake-up disable (eg enable it) for the core
// Clear the auto special wake-up disable (eg enable it) for the core.
l_bit = EQ_CME_SCOM_LMCR_C0_AUTO_SPECIAL_WAKEUP_DISABLE + (l_core_num % 2);
l_lmcr.flush<0>().setBit(l_bit);
FAPI_TRY(fapi2::putScom(l_ex_chplt, EX_CME_SCOM_LMCR_SCOM1, l_lmcr),
"PutScom of LMCR failed");
"PutScom of LMCR failed: core %d", l_core_num);
FAPI_INF ("==> Auto spl wakeup enabled for core %d", l_core_num);
}
else
{
FAPI_TRY (FAPI_ATTR_GET (fapi2::ATTR_PM_MALF_CYCLE, i_target,
l_malfAlertActive));

if (l_malfAlertActive == fapi2::ENUM_ATTR_PM_MALF_CYCLE_INACTIVE)
{
FAPI_ASSERT (false,
fapi2::PM_RESET_SPWKUP_DONE_ERROR()
fapi2::PM_RESET_SPWKUP_DONE_ERROR ()
.set_CORE_TARGET(l_core)
.set_GPMMR(l_gpmmr),
.set_GPMMR(l_gpmmr)
.set_MALF_ALERT_ACTIVE(l_malfAlertActive),
"Core expected to be in special wake-up is not "
"prior to setting auto special wake-up mode");
}
else
{
// It is possible that special wakeup had failed as we are in PM MALF path
// Log a info error and continue with the Reset flow
// In Malf Path, it is possible that special wakeup failed
// on cores above a bad CME. These cores should come in as
// bad via the PHYP bad core vector & PM Init is expected to
// come up without them (the CME catering to these).
// So, do not break the PM Reset/Recvoery flow in this case
FAPI_ASSERT_NOEXIT ( false,
fapi2::PM_RESET_SPWKUP_DONE_ERROR()
fapi2::PM_RESET_SPWKUP_DONE_ERROR(fapi2::FAPI2_ERRL_SEV_RECOVERED)
.set_CORE_TARGET(l_core)
.set_GPMMR(l_gpmmr),
"Core expected to be in special wake-up is not "
"prior to setting auto special wake-up mode" );
.set_GPMMR(l_gpmmr)
.set_MALF_ALERT_ACTIVE(l_malfAlertActive),
"Core expected to be in special wake-up is not prior to setting"
" auto special wake-up mode. Ignored in PM recovery Path!");
}
}
}
Expand Down
25 changes: 0 additions & 25 deletions src/import/chips/p9/procedures/hwp/pm/p9_pm_stop_gpe_init.C
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,6 @@ fapi2::ReturnCode p9_pm_stop_gpe_init(

const char* PM_MODE_NAME_VAR; //Defines storage for PM_MODE_NAME
FAPI_INF("Executing p9_stop_gpe_init in mode %s", PM_MODE_NAME(i_mode));

fapi2::ATTR_PM_MALF_ALERT_ENABLE_Type malfAlertEnable =
fapi2::ENUM_ATTR_PM_MALF_ALERT_ENABLE_FALSE;
uint8_t fusedModeState = 0;
uint8_t coreQuiesceDis = 0;
uint8_t l_core_number = 0;
Expand All @@ -154,12 +151,6 @@ fapi2::ReturnCode p9_pm_stop_gpe_init(
coreQuiesceDis),
"Error from FAPI_ATTR_GET for attribute ATTR_SYSTEM_CORE_PERIODIC_QUIESCE_DISABLE");

FAPI_IMP ("reading ATTR_PM_MALF_ALERT_ENABLE");
FAPI_TRY(FAPI_ATTR_GET(fapi2::ATTR_PM_MALF_ALERT_ENABLE,
FAPI_SYSTEM,
malfAlertEnable),
"Error from FAPI_ATTR_GET for attribute ATTR_PM_MALF_ALERT_ENABLE");

// Check each core has a functional EX and EQ
auto l_functional_core_vector =
i_target.getChildren<fapi2::TARGET_TYPE_CORE>
Expand Down Expand Up @@ -321,22 +312,6 @@ fapi2::ReturnCode p9_pm_stop_gpe_init(
l_data64.flush<0>().setBit<p9hcd::OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ>();
FAPI_TRY(fapi2::putScom(i_target, PU_OCB_OCI_OCCFLG2_CLEAR, l_data64));

// Set the Malf Alert Enabled policy to OCCFLG2 reg bit 29
FAPI_IMP ("Malf Alert Policy Enabled: %d", malfAlertEnable);

l_data64.flush<0>().setBit<p9hcd::STOP_RECOVERY_TRIGGER_ENABLE>();

if (malfAlertEnable == fapi2::ENUM_ATTR_PM_MALF_ALERT_ENABLE_TRUE)
{
FAPI_TRY(fapi2::putScom(i_target, PU_OCB_OCI_OCCFLG2_SET, l_data64));
}
else
{
FAPI_TRY(fapi2::putScom(i_target, PU_OCB_OCI_OCCFLG2_CLEAR, l_data64));
}

FAPI_IMP ("Malf Alert Policy Set to OCC FLAG2 .. now init SGPE");

// Boot the STOP GPE
FAPI_TRY(stop_gpe_init(i_target), "ERROR: failed to initialize Stop GPE");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@
auto special wake-up mode
</description>
<ffdc>CORE_TARGET</ffdc>
<ffdc>DPLL_FREQ</ffdc>
<ffdc>GPMMR</ffdc>
<ffdc>MALF_ALERT_ACTIVE</ffdc>
<callout>
<procedure>CODE</procedure>
<priority>HIGH</priority>
Expand Down
7 changes: 4 additions & 3 deletions src/import/hwpf/fapi2/include/collect_reg_ffdc.H
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ ReturnCode collectRegisterAndAddressData(std::vector<T>& i_addresses, U& i_reade
FAPI_DBG("collectRegisterAndAddressData -> address count: 0x%lx", i_addresses.size());

T l_data = 0;
uint32_t l_addr = 0;

ReturnCode l_rc = FAPI2_RC_SUCCESS;

Expand All @@ -423,14 +424,14 @@ ReturnCode collectRegisterAndAddressData(std::vector<T>& i_addresses, U& i_reade
l_data = l_buf();
}

l_address = htobe32(l_address);
l_address = htobe64(l_address);
l_data = htobe64(l_data);
l_addr = l_address;

memcpy(o_pData, &l_address, sizeof(uint32_t));
memcpy(o_pData, &l_addr, sizeof(uint32_t));
o_pData += sizeof(uint32_t);

memcpy(o_pData, &l_data, sizeof(T));

o_pData += sizeof(T);

}
Expand Down

0 comments on commit 9787cfb

Please sign in to comment.