Skip to content

Commit

Permalink
Fixed several small bugs found via code review
Browse files Browse the repository at this point in the history
CQ:SW474830
Change-Id: I1ff22805b9bb3facf68d4d62ea66de169be9b98b
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83213
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
  • Loading branch information
Matthickman14 authored and dcrowell77 committed Sep 19, 2019
1 parent 08501bc commit b0cd81c
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 97 deletions.
4 changes: 2 additions & 2 deletions src/include/usr/isteps/nvdimm/nvdimm.H
Expand Up @@ -57,7 +57,7 @@ enum nvdimm_err_status
* @param[in] i_nvdimmList - list of nvdimm targets
*
**/
errlHndl_t nvdimm_restore(TARGETING::TargetHandleList &i_nvdimmList);
void nvdimm_restore(TARGETING::TargetHandleList &i_nvdimmList);


/**
Expand Down Expand Up @@ -456,7 +456,7 @@ void nvdimmAddPage4Regs(TARGETING::Target *i_nvdimm, errlHndl_t& io_err);
* @return errlHndl_t - nullptr if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimm_init(TARGETING::Target *i_nvdimm);
void nvdimm_init(TARGETING::Target *i_nvdimm);

}

Expand Down
2 changes: 2 additions & 0 deletions src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
Expand Up @@ -106,6 +106,7 @@ enum nvdimmModuleId
NVDIMM_NVM_HEALTH_CHECK = 0x38, // Health check on the NVM (non-volatile memory)/flash
NVDIMM_WAIT_OPER_OPS_COMPLETE = 0x39,
NVDIMM_COMPARE_CKSUM = 0x3A,
NVDIMM_CHECK_FW_SLOT = 0x3B,
};

/**
Expand Down Expand Up @@ -196,6 +197,7 @@ enum nvdimmReasonCode
NVDIMM_NVM_HEALTH_CHECK_FAILED = NVDIMM_COMP_ID | 0x4D, // !< An NVM health check on the NVDIMM failed
NVDIMM_VENDOR_LOG_TIMEOUT = NVDIMM_COMP_ID | 0x4E, // Vendor log for FFDC timeout
NVDIMM_VENDOR_LOG_CKSUM_FAILED = NVDIMM_COMP_ID | 0x4F, // Vendor log for FFDC checksum fail
NVDIMM_INVALID_FW_SLOT = NVDIMM_COMP_ID | 0x50,
};

enum UserDetailsTypes
Expand Down
93 changes: 71 additions & 22 deletions src/usr/isteps/nvdimm/nvdimm.C
Expand Up @@ -155,6 +155,9 @@ static constexpr uint8_t MSBIT_SET_MASK = 0x80;
static constexpr uint8_t MSBIT_CLR_MASK = 0x7F;
static constexpr uint8_t OPERATION_SLEEP_SECONDS = 0x1;

// Bit mask for checking the fw slot running
static constexpr uint8_t RUNNING_FW_SLOT = 0xF0;

#ifndef __HOSTBOOT_RUNTIME
// Warning thresholds
static constexpr uint8_t THRESHOLD_ES_LIFETIME = 0x07; // 7%
Expand Down Expand Up @@ -1675,7 +1678,7 @@ errlHndl_t nvdimmEpowSetup(TargetHandleList &i_nvdimmList)
* @param[in] i_nvdimmList - list of nvdimm targets
*
*/
errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
void nvdimm_restore(TargetHandleList &i_nvdimmList)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_restore()");

Expand Down Expand Up @@ -1783,15 +1786,10 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
if (l_err)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_restore() nvdimm[%X] failed during health status check", get_huid(l_nvdimm));
if (l_exit)
{
errlCommit( l_err, NVDIMM_COMP_ID );
}
else
errlCommit( l_err, NVDIMM_COMP_ID );
if (!l_exit)
{
// Redundant check with external err bugged
errlCommit( l_err, NVDIMM_COMP_ID );
return l_err;
break;
}
}

Expand All @@ -1814,7 +1812,6 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)

}while(0);

// Return err not being handled, temp commit:
if (l_err)
{
errlCommit(l_err, NVDIMM_COMP_ID);
Expand All @@ -1832,7 +1829,6 @@ errlHndl_t nvdimm_restore(TargetHandleList &i_nvdimmList)
}

TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_restore()");
return l_err;
}

/**
Expand Down Expand Up @@ -1935,7 +1931,7 @@ errlHndl_t nvdimm_factory_reset(Target *i_nvdimm)
* @param[in] i_nvdimm - nvdimm target
*
*/
errlHndl_t nvdimm_init(Target *i_nvdimm)
void nvdimm_init(Target *i_nvdimm)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimm_init() nvdimm[%X]",
get_huid(i_nvdimm));
Expand Down Expand Up @@ -1986,6 +1982,52 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
break;
}

// Check if the firmware slot is 0
l_err = nvdimmReadReg ( i_nvdimm, FW_SLOT_INFO, l_data);

if (l_err)
{
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], failed to read slot info",
get_huid(i_nvdimm));
break;
}

if (!(l_data & RUNNING_FW_SLOT))
{
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimm_init() nvdimm[%X], running on fw slot 0",
get_huid(i_nvdimm));
/*@
*@errortype
*@reasoncode NVDIMM_INVALID_FW_SLOT
*@severity ERRORLOG_SEV_PREDICTIVE
*@moduleid NVDIMM_CHECK_FW_SLOT
*@userdata1[0:31] Related ops (0xff = NA)
*@userdata1[32:63] Target Huid
*@userdata2 <UNUSED>
*@devdesc Encountered error when checking the firmware slot running
* on NVDIMM. Firmware is running on slot 0 instead of 1
*@custdesc NVDIMM incorrect firmware slot
*/
l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
NVDIMM_CHECK_FW_SLOT,
NVDIMM_INVALID_FW_SLOT,
NVDIMM_SET_USER_DATA_1(l_data, get_huid(i_nvdimm)),
0x0,
ERRORLOG::ErrlEntry::NO_SW_CALLOUT );

l_err->collectTrace( NVDIMM_COMP_NAME );

// Add callout of nvdimm with no deconfig/gard
l_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);

errlCommit(l_err, NVDIMM_COMP_ID);
}

// Get the timeout values for the major ops at init
l_err = nvdimmGetTimeoutVal(i_nvdimm);
if (l_err)
Expand Down Expand Up @@ -2023,8 +2065,12 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
*@userdata1[0:31] Related ops (0xff = NA)
*@userdata1[32:63] Target Huid
*@userdata2 <UNUSED>
*@devdesc Encountered error erasing previously stored data image
* on NVDIMM. Likely due to timeout and/or controller error
*@devdesc NO_RESET_N: The NVDIMM experienced a power loss, but no CSAVE
* was triggered since the NVDIMM did not detect an asserted
* RESET_N. If there is a prior predicitve log for OCC in safe
* mode, than this would be the reason for NO_RESET_N. Otherwise
* there could be a problem with the RESET_N signal between proc
* and NVDIMM.
*@custdesc NVDIMM error erasing data image
*/
l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
Expand All @@ -2040,9 +2086,10 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
// Failure to erase could mean internal NV controller error and/or
// HW error on nand flash. NVDIMM will lose persistency if failed to
// erase nand flash
l_err->addPartCallout( i_nvdimm,
HWAS::NV_CONTROLLER_PART_TYPE,
HWAS::SRCI_PRIORITY_LOW);
l_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);

// Collect register data for FFDC Traces
nvdimmTraceRegs ( i_nvdimm, l_RegInfo );
Expand Down Expand Up @@ -2131,14 +2178,20 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
HWAS::SRCI_PRIORITY_HIGH,
HWAS::DECONFIG,
HWAS::GARD_Fatal);
break;
}
else
{
// Callout dimm without gard if image is valid
l_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);

// Set ATTR_NV_STATUS_FLAG to partial working as data may persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
errlCommit(l_err, NVDIMM_COMP_ID);
}
break;
}

// Check Health Status Registers
Expand All @@ -2158,14 +2211,10 @@ errlHndl_t nvdimm_init(Target *i_nvdimm)
TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimm_init() nvdimm[%X]",
get_huid(i_nvdimm));

// Return err not being handled, temp commit:
if (l_err)
{
errlCommit(l_err, NVDIMM_COMP_ID);
}


return l_err;
}


Expand Down
2 changes: 1 addition & 1 deletion src/usr/isteps/nvdimm/nvdimm.H
Expand Up @@ -366,7 +366,7 @@ enum i2c_out_values : uint8_t
ARM_ERROR = 0X02,
RSTR_ERROR = 0x02,
SAVE_ERROR = 0x02,
ARM_CLEAR = 0x20,
ARM_CLEAR_ALL = 0x3C,
};

// Timeout-related enum
Expand Down
23 changes: 11 additions & 12 deletions src/usr/isteps/nvdimm/nvdimmErrorLog.C
Expand Up @@ -375,13 +375,13 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
if(!l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

// Callout dimm and gard
// Callout dimm without deconfig or gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_Fatal);
HWAS::GARD_NULL);
}
else
{
Expand All @@ -392,11 +392,11 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
errlCommit( l_err, NVDIMM_COMP_ID );
}

// Callout the dimm but do not deconfig or gard
// Callout and gard the dimm
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);
HWAS::GARD_Fatal);
}

break;
Expand All @@ -412,7 +412,7 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::GARD_NULL);

// Set ATTR_NV_STATUS_FLAG to partially working as data may persist despite errors
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

break;
}
Expand Down Expand Up @@ -557,7 +557,7 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
if(!l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

// Callout dimm but do not deconfig or gard
o_err->addHwCallout( i_nvdimm,
Expand All @@ -579,7 +579,6 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::DECONFIG,
HWAS::GARD_Fatal);
}

break;
}

Expand All @@ -599,7 +598,7 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::GARD_NULL);

// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

break;
}
Expand Down Expand Up @@ -699,7 +698,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
if(!l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);
}
else
{
Expand Down Expand Up @@ -729,7 +728,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::GARD_NULL);

// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

break;
}
Expand Down

0 comments on commit b0cd81c

Please sign in to comment.