Skip to content

Commit

Permalink
Inform PHYP of NVDIMM protection by OCC
Browse files Browse the repository at this point in the history
The OCC is responsible for detecting the EPOW signal
and triggering the save operation on the NVDIMM.
Therefore, if the OCC is not running we are unprotected
from a poweroff event.   PHYP needs to inform the LPARs
using the NV (non-volatile) memory of this state so they
can behave accordingly.

HBRT is responsible for telling PHYP when we get into this state.
There are two ways we can detect this state:
a) HBRT explicitly puts the PM complex into reset
b) PRD detects a specific FIR bit

The message should include this data:
- what state we are in (protected or unprotected)
- which processor is affected

Work for this story will include:
- Definition of the new message
- Creating a utility function to send the message
- Calling utility function to send 'unprotected' message
  inside of all pm reset paths at runtime

Change-Id: Ib015d001d47883a247faedabedb0705ba0f1b215
RTC:201181
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/68870
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: TSUNG K. YEUNG <tyeung@us.ibm.com>
Reviewed-by: Roland Veloz <rveloz@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
mderkse1 authored and dcrowell77 committed Jan 24, 2019
1 parent 9de9d8f commit cb35695
Show file tree
Hide file tree
Showing 16 changed files with 764 additions and 230 deletions.
20 changes: 18 additions & 2 deletions src/include/runtime/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2013,2018 */
/* Contributors Listed Below - COPYRIGHT 2013,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -98,7 +98,7 @@ enum MemoryError_t
#define HBRT_RC_NEXT_OPEN_RC ((int)(0x0u - 0x1009u)) /* 0xFFFF_EFF7 */

/** End return codes for scom_read, scom_write. */


/**
* I2C Master Description: chip, engine and port packed into
Expand Down Expand Up @@ -551,6 +551,14 @@ typedef struct hostInterfaces
HBRT_FW_MSG_HBRT_FSP_RESP = 6,
HBRT_FW_MSG_TYPE_I2C_LOCK = 7,
HBRT_FW_MSG_TYPE_SBE_STATE = 8,
HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION = 9,
};

// NVDIMM protection state enum
enum
{
HBRT_FW_NVDIMM_NOT_PROTECTED = 0,
HBRT_FW_NVDIMM_PROTECTED = 1
};

struct hbrt_fw_msg // define struct hbrt_fw_msg
Expand Down Expand Up @@ -615,6 +623,14 @@ typedef struct hostInterfaces
uint64_t i_state; // state of the SBE; 0 = disabled, 1 = enabled
} __attribute__ ((packed)) sbe_state;

// This struct is sent from HBRT with
// io_type set to HBRT_FW_MSG_TYPE_NVDIMM_PROTECTION
struct
{
uint64_t i_procId; // processor ID of the NVDIMM with/without OCC protection
uint64_t i_state; // NVDIMM protection state enum
} __attribute__ ((packed)) nvdimm_protection_state;

// This struct is sent from HBRT with
// io_type set to HBRT_FW_MSG_HBRT_FSP_REQ or
// HBRT_FW_MSG_HBRT_FSP_RESP
Expand Down
70 changes: 58 additions & 12 deletions src/include/usr/isteps/nvdimm/nvdimm.H
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,29 @@ enum
void nvdimm_restore(TARGETING::TargetHandleList &i_nvdimmList);
#endif


/**
* @brief This function arms the trigger to enable backup in the event
* of power loss (DDR Reset_n goes low)
* @brief This function erases image on the nvdimm target
*
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmArmResetN(TARGETING::Target *i_nvdimm);
errlHndl_t nvdimmEraseNF(TARGETING::Target *i_nvdimm);

/**
* @brief This function erases image on the nvdimm target
* @brief Set the status flag
*
* @param[in] i_nvdimm - nvdimm target with NV controller
* @param[in] i_nvdimm - nvdimm target
*
* @param[in] i_status_flag - status flag to set for each nvdimm
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmEraseNF(TARGETING::Target *i_nvdimm);
void nvdimmSetStatusFlag(TARGETING::Target *i_nvdimm, const uint8_t i_status_flag);


#ifdef __HOSTBOOT_RUNTIME

/**
* @brief Check nvdimm error state
Expand All @@ -86,14 +89,57 @@ errlHndl_t nvdimmEraseNF(TARGETING::Target *i_nvdimm);
bool nvdimmInErrorState(TARGETING::Target *i_nvdimm);

/**
* @brief Set the status flag
* @brief This function arms the trigger to enable backup in the event
* of power loss (DDR Reset_n goes low)
*
* @param[in] i_nvdimm - nvdimm target
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @param[in] i_status_flag - status flag to set for each nvdimm
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmArmResetN(TARGETING::Target *i_nvdimm);

/**
* @brief Arms the trigger to enable backup in the event of a power loss
* on each NVDIMM
*
* The trigger (DDR_RESETN to the DIMM) is used to tell the NVDIMM
* that we have an EPOW event, so the NV controller can backup the
* data from the DRAM to flash. This will enable the NV controller
* to react when it sees the trigger toggles.
*
* @param[in] i_nvdimmTargetList : list of dimms that are NVDIMMs
* @return true if no errors logged, else false
*/
void nvdimmSetStatusFlag(TARGETING::Target *i_nvdimm, const uint8_t i_status_flag);
bool nvdimmArm(TARGETING::TargetHandleList &i_nvdimmTargetList);


/**
* @brief NVDIMM protection state
*
* NOT_PROTECTED - default state
* PROTECTED - switches to this when armed & OCC is in control
* UNPROTECTED_BECAUSE_ERROR - PRD detected error on NV controller
* Note: error will stay with target preventing PROTECTED status
* until power is cycled again
*/
enum nvdimm_protection_t
{
NOT_PROTECTED = 0,
PROTECTED = 1,
UNPROTECTED_BECAUSE_ERROR = 2
};

/**
* @brief Notify PHYP of NVDIMM protection status
*
* @param i_target Processor with NVDIMM
* @param i_state Protection state of NVDIMM
*/
errlHndl_t notifyNvdimmProtectionChange(TARGETING::Target* i_target,
const nvdimm_protection_t i_state);
#endif

}

#endif // NVDIMM_EXT_H__
Expand Down
4 changes: 2 additions & 2 deletions src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ enum nvdimmModuleId
NVDIMM_CHECK_ERASE = 0x16,
NVDIMM_ARM_ERASE = 0x17,
NVDIMM_CHECK_READY = 0x18,

NOTIFY_NVDIMM_PROTECTION_CHG = 0x19,
};

/**
Expand Down Expand Up @@ -111,7 +111,7 @@ enum nvdimmReasonCode
NVDIMM_ERASE_FAILED = NVDIMM_COMP_ID | 0x19, // Failure to erase
NVDIMM_RESTORE_FAILED = NVDIMM_COMP_ID | 0x1A, // Failure to restore
NVDIMM_NOT_READY = NVDIMM_COMP_ID | 0x1B, // NVDIMM not ready for host to access

NVDIMM_NULL_FIRMWARE_REQUEST_PTR = NVDIMM_COMP_ID | 0x1C, // Firmware request is NULL
};

enum UserDetailsTypes
Expand Down
16 changes: 14 additions & 2 deletions src/include/usr/targeting/common/util.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2012,2018 */
/* Contributors Listed Below - COPYRIGHT 2012,2019 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -38,6 +38,8 @@ namespace TARGETING
{

class Target;
typedef Target* TargetHandle_t;
typedef std::vector<TargetHandle_t> TargetHandleList;

/**
* @brief Macro which indicates whether to translate addresses or not
Expand Down Expand Up @@ -172,6 +174,7 @@ bool orderByNodeAndPosition( Target* i_firstProc,
*/
uint8_t is_fused_mode( );


/**
* @brief Determine if the given dimm target is an NVDIMM
*
Expand All @@ -181,6 +184,15 @@ uint8_t is_fused_mode( );
*/
bool isNVDIMM( TARGETING::Target * i_target );

}
/**
* @brief Grab list of NVDIMMs under the processor
*
* @param[in] i_proc : processor under which to search for NVDIMMs
*
* @return List of DIMM targets that are NVDIMMs
*/
TARGETING::TargetHandleList getProcNVDIMMs( TARGETING::Target * i_proc );

} // TARGETING

#endif // __TARGETING_COMMON_UTIL_H
151 changes: 0 additions & 151 deletions src/usr/isteps/nvdimm/nvdimm.C
Original file line number Diff line number Diff line change
Expand Up @@ -205,26 +205,6 @@ void nvdimmSetStatusFlag(Target *i_nvdimm, const uint8_t i_status_flag)
,TARGETING::get_huid(i_nvdimm), i_status_flag);
}

/**
* @brief Check nvdimm error state
*
* @param[in] i_nvdimm - nvdimm target
*
* @return bool - true if nvdimm is in any error state, false otherwise
*/
bool nvdimmInErrorState(Target *i_nvdimm)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmInErrorState() HUID[%X]",TARGETING::get_huid(i_nvdimm));

uint8_t l_statusFlag = i_nvdimm->getAttr<TARGETING::ATTR_NV_STATUS_FLAG>();
bool l_ret = true;

if ((l_statusFlag & NSTD_ERR) == 0)
l_ret = false;

TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmInErrorState() HUID[%X]",TARGETING::get_huid(i_nvdimm));
return l_ret;
}

/**
* @brief Check NV controller ready state
Expand Down Expand Up @@ -447,31 +427,6 @@ errlHndl_t nvdimmPollEraseDone(Target* i_nvdimm,
return l_err;
}

/**
* @brief This function polls the command status register for arm completion
* (does not indicate success or fail)
*
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @param[out] o_poll - total polled time in ms
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmPollArmDone(Target* i_nvdimm,
uint32_t &o_poll)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmPollArmDone() nvdimm[%X]", TARGETING::get_huid(i_nvdimm) );

errlHndl_t l_err = nullptr;

l_err = nvdimmPollStatus ( i_nvdimm, ARM, o_poll);

TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmPollArmDone() nvdimm[%X]",
TARGETING::get_huid(i_nvdimm));

return l_err;
}

/**
* @brief This function polls the command status register for backup power
Expand Down Expand Up @@ -611,113 +566,7 @@ errlHndl_t nvdimmSetESPolicy(Target* i_nvdimm)
}


/**
* @brief This function checks the arm status register to make sure
* the trigger has been armed to ddr_reset_n
*
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmCheckArmSuccess(Target *i_nvdimm)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmCheckArmSuccess() nvdimm[%X]",
TARGETING::get_huid(i_nvdimm));

errlHndl_t l_err = nullptr;
uint8_t l_data = 0;

l_err = nvdimmReadReg(i_nvdimm, ARM_STATUS, l_data);

if (l_err)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]"
"failed to read arm status reg!",TARGETING::get_huid(i_nvdimm));
}
else if ((l_data & ARM_SUCCESS) != ARM_SUCCESS)
{

TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmCheckArmSuccess() nvdimm[%X]"
"failed to arm!",TARGETING::get_huid(i_nvdimm));
/*@
*@errortype
*@reasoncode NVDIMM_ARM_FAILED
*@severity ERRORLOG_SEV_PREDICTIVE
*@moduleid NVDIMM_SET_ARM
*@userdata1[0:31] Related ops (0xff = NA)
*@userdata1[32:63] Target Huid
*@userdata2 <UNUSED>
*@devdesc Encountered error arming the catastrophic save
* trigger on NVDIMM. Make sure an energy source
* is connected to the NVDIMM and the ES policy
* is set properly
*@custdesc NVDIMM encountered error arming save trigger
*/
l_err = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_PREDICTIVE,
NVDIMM_SET_ARM,
NVDIMM_ARM_FAILED,
NVDIMM_SET_USER_DATA_1(ARM, TARGETING::get_huid(i_nvdimm)),
0x0,
ERRORLOG::ErrlEntry::NO_SW_CALLOUT );

l_err->collectTrace(NVDIMM_COMP_NAME, 256 );
//@TODO RTC 199645 - add HW callout on dimm target
//failure to arm could mean internal NV controller error or
//even error on the battery pack. NVDIMM will lose persistency
//if failed to arm trigger
}

TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmCheckArmSuccess() nvdimm[%X] ret[%X]",
TARGETING::get_huid(i_nvdimm), l_data);

return l_err;
}

/**
* @brief This function arms the trigger to enable backup in the event
* of power loss (DDR Reset_n goes low) in conjunction with
* ATOMIC_SAVE_AND_ERASE. A separate erase command is not required
* as the image will get erased immediately before backup on the
* next catastrophic event.
*
* @param[in] i_nvdimm - nvdimm target with NV controller
*
* @return errlHndl_t - Null if successful, otherwise a pointer to
* the error log.
*/
errlHndl_t nvdimmArmResetN(Target *i_nvdimm)
{
TRACUCOMP(g_trac_nvdimm, ENTER_MRK"nvdimmArmResetN() nvdimm[%X]",
TARGETING::get_huid(i_nvdimm));

errlHndl_t l_err = nullptr;

// Setting ATOMIC_SAVE_AND_ERASE in conjunction with ARM_RESETN. With this,
// the content of the persistent data is not erased until immediately after
// the next catastrophic event has occurred.
l_err = nvdimmWriteReg(i_nvdimm, ARM_CMD, ARM_RESETN_AND_ATOMIC_SAVE_AND_ERASE);

if (l_err)
{
TRACFCOMP(g_trac_nvdimm, ERR_MRK"nvdimmArmResetN() nvdimm[%X] error arming nvdimm!!",
TARGETING::get_huid(i_nvdimm));
}
else
{
// Arm happens one module at a time. No need to set any offset on the counter
uint32_t l_poll = 0;
l_err = nvdimmPollArmDone(i_nvdimm, l_poll);
if (!l_err)
{
l_err = nvdimmCheckArmSuccess(i_nvdimm);
}
}

TRACUCOMP(g_trac_nvdimm, EXIT_MRK"nvdimmArmResetN() nvdimm[%X]",
TARGETING::get_huid(i_nvdimm));
return l_err;
}

/**
* @brief This function checks for valid image on the given target
Expand Down

0 comments on commit cb35695

Please sign in to comment.