Skip to content

Commit

Permalink
Fixed several Arm error handling bugs
Browse files Browse the repository at this point in the history
CQ:SW475445
Change-Id: I19cd03850b65a23d2a35c9160352cdc7a4946667
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83534
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: TSUNG K YEUNG <tyeung@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
  • Loading branch information
Matthickman14 authored and dcrowell77 committed Sep 25, 2019
1 parent d507b6f commit 913c6fd
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 15 deletions.
4 changes: 4 additions & 0 deletions src/usr/isteps/nvdimm/errlud_nvdimm.C
Expand Up @@ -185,6 +185,7 @@ UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo )
// 1 byte : NVDIMM_CMD_STATUS0
// 1 byte : ABORT_CMD_TIMEOUT
// 1 byte : ERASE_STATUS
// 1 byte : ERASE_FAIL_INFO
// 1 byte : ERASE_TIMEOUT0
// 1 byte : ERASE_TIMEOUT1
// 1 byte : SET_ES_POLICY_STATUS
Expand All @@ -193,6 +194,9 @@ UdNvdimmOPParms::UdNvdimmOPParms( const nvdimm_reg_t &i_RegInfo )
// 1 byte : RESTORE_TIMEOUT0
// 1 byte : RESTORE_TIMEOUT1
// 1 byte : ARM_STATUS
// 1 byte : ARM_FAIL_INFO
// 1 byte : ARM_TIMEOUT0
// 1 byte : ARM_TIMEOUT1
// 1 byte : SET_EVENT_NOTIFICATION_STATUS
// 1 byte : ENCRYPTION_CONFIG_STATUS

Expand Down
2 changes: 2 additions & 0 deletions src/usr/isteps/nvdimm/nvdimm.H
Expand Up @@ -122,6 +122,8 @@ enum i2cReg : uint16_t
SET_ES_POLICY_STATUS = 0x070,
FIRMWARE_OPS_STATUS = 0x071,
OPERATIONAL_UNIT_OPS_STATUS = 0x072,
ERASE_FAIL_INFO = 0x073,
ARM_FAIL_INFO = 0x076,
CSAVE_INFO = 0x080,
CSAVE_FAIL_INFO0 = 0x084,
CSAVE_FAIL_INFO1 = 0x085,
Expand Down
35 changes: 21 additions & 14 deletions src/usr/isteps/nvdimm/nvdimmErrorLog.C
Expand Up @@ -158,6 +158,14 @@ void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo)
}
o_RegInfo.Erase_Status = l_data;

// Read ERASE FAIL INFO register
l_err = nvdimmReadReg(i_nvdimm, ERASE_FAIL_INFO, l_data);
if(l_err)
{
errlCommit( l_err, NVDIMM_COMP_ID );
}
o_RegInfo.Erase_Fail_Info = l_data;

// Read ERASE TIMEOUT0 register
l_err = nvdimmReadReg(i_nvdimm, ERASE_TIMEOUT0, l_data);
if(l_err)
Expand Down Expand Up @@ -230,6 +238,14 @@ void nvdimmTraceRegs(Target *i_nvdimm, nvdimm_reg_t& o_RegInfo)
}
o_RegInfo.Arm_Status = l_data;

// Read ARM FAIL INFO register
l_err = nvdimmReadReg(i_nvdimm, ARM_FAIL_INFO, l_data);
if(l_err)
{
errlCommit( l_err, NVDIMM_COMP_ID );
}
o_RegInfo.Arm_Fail_Info = l_data;

// Read ARM TIMEOUT0 register
l_err = nvdimmReadReg(i_nvdimm, ARM_TIMEOUT0, l_data);
if(l_err)
Expand Down Expand Up @@ -506,26 +522,17 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
o_err->addPartCallout( i_nvdimm,
HWAS::BPM_CABLE_PART_TYPE,
HWAS::SRCI_PRIORITY_HIGH);
// Callout dimm but do not deconfig or gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);

// Check restore status and set dimm status accordingly
if(l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
nvdimmSetStatusFlag(i_nvdimm, NSTD_ERR_VAL_SR);

// Callout dimm but do not deconfig or gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);
}
else
{
// Callout dimm, deconfig and gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::DECONFIG,
HWAS::GARD_Fatal);
}

break;
Expand Down
4 changes: 4 additions & 0 deletions src/usr/isteps/nvdimm/nvdimmdd.H
Expand Up @@ -110,6 +110,7 @@ struct nvdimm_reg_t
uint8_t NVDimm_Ready;
uint8_t NVDimm_CMD_Status0;
uint8_t Erase_Status;
uint8_t Erase_Fail_Info;
uint8_t Erase_Timeout0;
uint8_t Erase_Timeout1;
uint8_t Abort_CMD_Timeout;
Expand All @@ -119,6 +120,7 @@ struct nvdimm_reg_t
uint8_t Restore_Timeout0;
uint8_t Restore_Timeout1;
uint8_t Arm_Status;
uint8_t Arm_Fail_Info;
uint8_t Arm_Timeout0;
uint8_t Arm_Timeout1;
uint8_t Set_Event_Notification_Status;
Expand All @@ -141,6 +143,7 @@ struct nvdimm_reg_t
NVDimm_Ready(0),
NVDimm_CMD_Status0(0),
Erase_Status(0),
Erase_Fail_Info(0),
Erase_Timeout0(0),
Erase_Timeout1(0),
Abort_CMD_Timeout(0),
Expand All @@ -150,6 +153,7 @@ struct nvdimm_reg_t
Restore_Timeout0(0),
Restore_Timeout1(0),
Arm_Status(0),
Arm_Fail_Info(0),
Arm_Timeout0(0),
Arm_Timeout1(0),
Set_Event_Notification_Status(0),
Expand Down
6 changes: 6 additions & 0 deletions src/usr/isteps/nvdimm/plugins/errludP_nvdimm.H
Expand Up @@ -212,6 +212,7 @@ public:
// 1 byte : NVDIMM_READY
// 1 byte : NVDIMM_CMD_STATUS0
// 1 byte : ERASE_STATUS
// 1 byte : ERASE_FAIL_INFO
// 1 byte : ERASE_TIMEOUT0
// 1 byte : ERASE_TIMEOUT1
// 1 byte : ABORT_CMD_TIMEOUT
Expand All @@ -221,6 +222,7 @@ public:
// 1 byte : RESTORE_TIMEOUT0
// 1 byte : RESTORE_TIMEOUT1
// 1 byte : ARM_STATUS
// 1 byte : ARM_FAIL_INFO
// 1 byte : ARM_TIMEOUT0
// 1 byte : ARM_TIMEOUT1
// 1 byte : SET_EVENT_NOTIFICATION_STATUS
Expand Down Expand Up @@ -253,6 +255,8 @@ public:
++l_databuf;
i_parser.PrintNumber("Erase Status Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Erase Fail Info Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Erase Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Erase Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf));
Expand All @@ -271,6 +275,8 @@ public:
++l_databuf;
i_parser.PrintNumber("Arm Status Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Arm Fail Info Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Arm Timeout0 Register: ","%.2lX",TO_UINT8(l_databuf));
++l_databuf;
i_parser.PrintNumber("Arm Timeout1 Register: ","%.2lX",TO_UINT8(l_databuf));
Expand Down
7 changes: 6 additions & 1 deletion src/usr/isteps/nvdimm/runtime/nvdimm_rt.C
Expand Up @@ -304,7 +304,12 @@ bool nvdimmArm(TargetHandleList &i_nvdimmTargetList)
if (l_err)
{
TRACFCOMP(g_trac_nvdimm, "nvdimmArm() nvdimm[%X] failed first health status check", get_huid(l_nvdimm));
if (!l_continue)

// The arm timeout variable is used here as the continue variable for the
// health status check. This was done to include the timeout for use in the check
// If true either the arm timed out with a health status fail or the
// health status check failed with another disarm and exit condition
if (l_arm_timeout)
{
errlCommit( l_err, NVDIMM_COMP_ID );

Expand Down

0 comments on commit 913c6fd

Please sign in to comment.