Skip to content

Commit

Permalink
Fixed error handling issues in arm path
Browse files Browse the repository at this point in the history
Addresses the following:
1. nvdimms not getting disarmed in error cases
2. nvdimms getting garded on BPM-specific errors

Change-Id: Ibf9b391cb94c1dd247406f960298a1e55ebf186f
CQ:SW475860
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/84061
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
  • Loading branch information
Matthickman14 authored and dcrowell77 committed Sep 27, 2019
1 parent 5db287b commit 7b87226
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 121 deletions.
2 changes: 2 additions & 0 deletions src/include/usr/isteps/nvdimm/nvdimmreasoncodes.H
Expand Up @@ -107,6 +107,7 @@ enum nvdimmModuleId
NVDIMM_WAIT_OPER_OPS_COMPLETE = 0x39,
NVDIMM_COMPARE_CKSUM = 0x3A,
NVDIMM_CHECK_FW_SLOT = 0x3B,
NVDIMM_ARM_PRE_CHECK = 0x3C,
};

/**
Expand Down Expand Up @@ -199,6 +200,7 @@ enum nvdimmReasonCode
NVDIMM_VENDOR_LOG_CKSUM_FAILED = NVDIMM_COMP_ID | 0x4F, // Vendor log for FFDC checksum fail
NVDIMM_INVALID_FW_SLOT = NVDIMM_COMP_ID | 0x50,
NVDIMM_ERASE_ERROR = NVDIMM_COMP_ID | 0x51,
NVDIMM_ARM_PRE_CHECK_FAILED = NVDIMM_COMP_ID | 0x52,
};

enum UserDetailsTypes
Expand Down
6 changes: 0 additions & 6 deletions src/usr/isteps/nvdimm/nvdimm.C
Expand Up @@ -71,12 +71,6 @@ TRAC_INIT(&g_trac_nvdimm, NVDIMM_COMP_NAME, 2*KILOBYTE);
namespace NVDIMM
{
#define NUM_OFFSET 2
#define NVDIMM_SET_USER_DATA_1(left_32_ops_id, right_32_huid) \
TWO_UINT32_TO_UINT64(left_32_ops_id, right_32_huid)

#define NVDIMM_SET_USER_DATA_2_TIMEOUT(left_32_polled, right_32_timeout) \
NVDIMM_SET_USER_DATA_1(left_32_polled, right_32_timeout)


typedef struct ops_timeoutInfo{
const char * desc;
Expand Down
8 changes: 7 additions & 1 deletion src/usr/isteps/nvdimm/nvdimm.H
Expand Up @@ -41,6 +41,12 @@ extern trace_desc_t* g_trac_nvdimm;

namespace NVDIMM
{
#define NVDIMM_SET_USER_DATA_1(left_32_ops_id, right_32_huid) \
TWO_UINT32_TO_UINT64(left_32_ops_id, right_32_huid)

#define NVDIMM_SET_USER_DATA_2_TIMEOUT(left_32_polled, right_32_timeout) \
NVDIMM_SET_USER_DATA_1(left_32_polled, right_32_timeout)


// I2C registers for page 0-3, extracted from JEDEC BAEBI spec
// Refer to BAEBI spec for details
Expand Down Expand Up @@ -369,7 +375,7 @@ enum i2c_out_values : uint8_t
RSTR_ERROR = 0x02,
SAVE_ERROR = 0x02,
ERASE_ERROR = 0x02,
ARM_CLEAR_ALL = 0x3C,
CLEAR_ALL_STATUS = 0x3C, //Clears CAVE, RESTORE, ERASE, and ARM status regs
};

// Timeout-related enum
Expand Down
40 changes: 13 additions & 27 deletions src/usr/isteps/nvdimm/nvdimmErrorLog.C
Expand Up @@ -384,11 +384,11 @@ bool nvdimmCalloutDimm(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
}
else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
{
l_continue = true;
l_continue = false;
}

// Check arm status and set dimm status accordingly
if(!l_continue)
if(l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);
Expand Down Expand Up @@ -549,7 +549,7 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
}
else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
{
l_continue = true;
l_continue = false;
}

// Callout BPM and Cable but cannot deconfig or gard
Expand All @@ -561,31 +561,17 @@ bool nvdimmBPMCableCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::SRCI_PRIORITY_HIGH);

// Check arm status and set dimm status accordingly
if(!l_continue)
if(l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);

// Callout dimm but do not deconfig or gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);
}
else
{
// Set ATTR_NV_STATUS_FLAG to dimm diarmed
l_err = notifyNvdimmProtectionChange(i_nvdimm, NVDIMM_DISARMED);
if (l_err)
{
errlCommit( l_err, NVDIMM_COMP_ID );
}
// Callout dimm, deconfig and gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_HIGH,
HWAS::DECONFIG,
HWAS::GARD_Fatal);
}

// Callout dimm but do not deconfig or gard
o_err->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL);
break;
}

Expand Down Expand Up @@ -687,7 +673,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
}
else if (((l_data & ARM_SUCCESS) != ARM_SUCCESS) || ((l_data & RESET_N_ARMED) != RESET_N_ARMED))
{
l_continue = true;
l_continue = false;
}

// Callout BPM on high
Expand All @@ -702,7 +688,7 @@ bool nvdimmBPMCallout(Target *i_nvdimm, uint8_t i_step, errlHndl_t& o_err)
HWAS::GARD_NULL);

// Check arm status and set dimm status accordingly
if(!l_continue)
if(l_continue)
{
// Set ATTR_NV_STATUS_FLAG to partially working as data may still persist
notifyNvdimmProtectionChange(i_nvdimm,NVDIMM_RISKY_HW_ERROR);
Expand Down Expand Up @@ -1303,7 +1289,7 @@ errlHndl_t nvdimmHealthStatusCheck(Target *i_nvdimm, uint8_t i_step, bool& o_con
TARGETING::get_huid(i_nvdimm),
0x0,
ERRORLOG::ErrlEntry::NO_SW_CALLOUT );
o_continue = true;
o_continue = false;
// Callout dimm but no deconfig and gard
l_err_t->addHwCallout( i_nvdimm,
HWAS::SRCI_PRIORITY_LOW,
Expand Down

0 comments on commit 7b87226

Please sign in to comment.