Skip to content

Commit 978b2c7

Browse files
cjcainA. Patrick Williams III
authored andcommitted
Prevent OCC exception from logging duplicate errors
Change-Id: I3262c42c906fc9cfa879e7df0e501e2f7fe1e2f3 CQ:SW320752 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/20578 Tested-by: Jenkins Server Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins Reviewed-by: Martha Broyles <mbroyles@us.ibm.com> Reviewed-by: SHELDON R. BAILEY <baileysh@us.ibm.com> Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com>
1 parent ef63b1a commit 978b2c7

File tree

7 files changed

+252
-106
lines changed

7 files changed

+252
-106
lines changed

src/include/usr/htmgt/htmgt_reasoncodes.H

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ namespace HTMGT
6363
HTMGT_RC_OT_THROTTLE_INVALID_N = HTMGT_COMP_ID | 0x04,
6464
HTMGT_RC_OCC_NOT_READY = HTMGT_COMP_ID | 0x05,
6565
HTMGT_RC_ATTRIBUTE_ERROR = HTMGT_COMP_ID | 0x06,
66+
HTMGT_RC_OCC_EXCEPTION = HTMGT_COMP_ID | 0x0E,
6667
HTMGT_RC_NO_SUPPORT = HTMGT_COMP_ID | 0x0F,
6768
HTMGT_RC_OCC_RESET = HTMGT_COMP_ID | 0x15,
6869
HTMGT_RC_OCC_CRIT_FAILURE = HTMGT_COMP_ID | 0x16,

src/usr/htmgt/htmgt.C

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,18 @@ namespace HTMGT
102102
l_err = OccManager::sendOccPoll();
103103
if (l_err)
104104
{
105-
// Continue even if failed (poll will be retried)
106-
ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
105+
if (OccManager::occNeedsReset())
106+
{
107+
// No need to continue if a reset is required
108+
TMGT_ERR("sendOccConfigData(): OCCs need to "
109+
"be reset");
110+
break;
111+
}
112+
else
113+
{
114+
// Continue even if failed (will be retried)
115+
ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
116+
}
107117
}
108118

109119
// Send ALL config data
@@ -415,17 +425,22 @@ namespace HTMGT
415425
{
416426
// Create an elog so the user knows the cmd failed.
417427
TMGT_ERR("enableOccActuation(): System is in safe mode");
428+
uint32_t safeInstance = 0;
429+
uint32_t safeRc = OccManager::getSafeModeReason(safeInstance);
418430
/*@
419431
* @errortype
420432
* @reasoncode HTMGT_RC_OCC_CRIT_FAILURE
421433
* @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION
422-
* @userdata1 OCC activate [1==true][0==false]
434+
* @userdata1[0:31] OCC activate [1==true][0==false]
435+
* @userdata1[32:63] return code triggering safe mode
436+
* @userdata2[0:31] safeMode flag
437+
* @userdata2[32:63] OCC instance
423438
* @devdesc Operation not allowed, system is in safe mode
424439
*/
425440
bldErrLog(l_err,
426441
HTMGT_MOD_ENABLE_OCC_ACTUATION,
427442
HTMGT_RC_OCC_CRIT_FAILURE,
428-
0, i_occActivation, 0, safeMode,
443+
i_occActivation, safeRc, safeMode, safeInstance,
429444
ERRORLOG::ERRL_SEV_UNRECOVERABLE);
430445
}
431446

src/usr/htmgt/htmgt_cfgdata.C

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,23 +196,23 @@ namespace HTMGT
196196
break;
197197

198198
default:
199-
TMGT_ERR("send_occ_config_data: Unsupported"
199+
TMGT_ERR("sendOccConfigData: Unsupported"
200200
" format type 0x%02X",
201201
format);
202202
cmdDataLen = 0;
203203
}
204204

205205
if (cmdDataLen > 0)
206206
{
207-
TMGT_INF("send_occ_config_data: Sending config"
207+
TMGT_INF("sendOccConfigData: Sending config"
208208
" 0x%02X to OCC%d",
209209
format, occInstance);
210210
OccCmd cmd(occ, OCC_CMD_SETUP_CFG_DATA,
211211
cmdDataLen, cmdData);
212212
errlHndl_t l_err = cmd.sendOccCmd();
213213
if (l_err != NULL)
214214
{
215-
TMGT_ERR("send_occ_config_data: OCC%d cfg "
215+
TMGT_ERR("sendOccConfigData: OCC%d cfg "
216216
"format 0x%02X failed with rc=0x%04X",
217217
occInstance, format,
218218
l_err->reasonCode());
@@ -222,7 +222,7 @@ namespace HTMGT
222222
{
223223
if (OCC_RC_SUCCESS != cmd.getRspStatus())
224224
{
225-
TMGT_ERR("send_occ_config_data: OCC%d cfg "
225+
TMGT_ERR("sendOccConfigData: OCC%d cfg "
226226
"format 0x%02X had bad rsp status"
227227
" 0x%02X for sysConfig",
228228
occInstance, format,
@@ -239,12 +239,17 @@ namespace HTMGT
239239
}
240240
} // if (sendData)
241241

242+
if (OccManager::occNeedsReset())
243+
{
244+
TMGT_ERR("sendOccConfigData(): OCCs need to be reset");
245+
}
246+
242247
} // for each config format
243248

244249
} // for each OCC
245250
}
246251

247-
} // end send_occ_config_data()
252+
} // end sendOccConfigData()
248253

249254

250255
/** OCC configuration data message versions */

src/usr/htmgt/htmgt_occ.C

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ namespace HTMGT
6363
iv_target(i_target),
6464
iv_lastPollValid(false),
6565
iv_occsPresent(1 << i_instance),
66+
iv_resetReason(OCC_RESET_REASON_NONE),
67+
iv_exceptionLogged(0),
6668
iv_resetCount(0),
6769
iv_version(0x01)
6870
{
@@ -235,6 +237,7 @@ namespace HTMGT
235237
iv_failed = false;
236238
iv_lastPollValid = false;
237239
iv_resetReason = OCC_RESET_REASON_NONE;
240+
iv_exceptionLogged = 0;
238241
}
239242

240243

@@ -622,6 +625,7 @@ namespace HTMGT
622625
{
623626
TMGT_INF("_setOccState: All OCCs have reached state "
624627
"0x%02X", requestedState);
628+
iv_state = requestedState;
625629

626630
if (OCC_STATE_ACTIVE == requestedState)
627631
{
@@ -769,13 +773,15 @@ namespace HTMGT
769773
* @errortype
770774
* @moduleid HTMGT_MOD_OCC_RESET
771775
* @reasoncode HTMGT_RC_OCC_RESET_THREHOLD
776+
* @userdata1 return code triggering safe mode
777+
* @userdata2 OCC instance
772778
* @devdesc OCC reset threshold reached.
773779
* Leaving OCCs in reset state
774780
*/
775781
bldErrLog(err,
776782
HTMGT_MOD_OCC_RESET,
777783
HTMGT_RC_OCC_CRIT_FAILURE,
778-
0, 0, 0, 0,
784+
0, cv_safeReturnCode, 0, cv_safeOccInstance,
779785
ERRORLOG::ERRL_SEV_UNRECOVERABLE);
780786
}
781787

@@ -910,6 +916,13 @@ namespace HTMGT
910916
}
911917

912918

919+
uint32_t OccManager::_getSafeModeReason(uint32_t & o_instance)
920+
{
921+
o_instance = cv_safeOccInstance;
922+
return cv_safeReturnCode;
923+
}
924+
925+
913926
bool OccManager::_occNeedsReset()
914927
{
915928
bool needsReset = false;
@@ -1039,6 +1052,40 @@ namespace HTMGT
10391052
return err;
10401053
}
10411054

1055+
// Consolidate all OCC states
1056+
void OccManager::_syncOccStates()
1057+
{
1058+
occStateId currentState = OCC_STATE_NO_CHANGE;
1059+
1060+
for(occList_t::const_iterator occ_itr = iv_occArray.begin();
1061+
(occ_itr != iv_occArray.end());
1062+
++occ_itr)
1063+
{
1064+
Occ * occ = *occ_itr;
1065+
if (OCC_STATE_NO_CHANGE == currentState)
1066+
{
1067+
currentState = occ->getState();
1068+
}
1069+
else
1070+
{
1071+
if (currentState != occ->getState())
1072+
{
1073+
// States do not match yet...
1074+
currentState = OCC_STATE_NO_CHANGE;
1075+
break;
1076+
}
1077+
}
1078+
}
1079+
if (OCC_STATE_NO_CHANGE != currentState)
1080+
{
1081+
if (iv_state != currentState)
1082+
{
1083+
TMGT_INF("syncOccStates: All OCCs are in 0x%02X", currentState);
1084+
iv_state = currentState;
1085+
}
1086+
}
1087+
}
1088+
10421089

10431090
uint8_t OccManager::getNumOccs()
10441091
{
@@ -1084,16 +1131,22 @@ namespace HTMGT
10841131

10851132
void OccManager::waitForOccCheckpoint()
10861133
{
1087-
return Singleton<OccManager>::instance()._waitForOccCheckpoint();
1134+
Singleton<OccManager>::instance()._waitForOccCheckpoint();
10881135
}
10891136

10901137
void OccManager::updateSafeModeReason(uint32_t i_src,
10911138
uint32_t i_instance)
10921139
{
1093-
return Singleton<OccManager>::instance().
1140+
Singleton<OccManager>::instance().
10941141
_updateSafeModeReason(i_src, i_instance);
10951142
}
10961143

1144+
uint32_t OccManager::getSafeModeReason(uint32_t & o_instance)
1145+
{
1146+
return Singleton<OccManager>::instance().
1147+
_getSafeModeReason(o_instance);
1148+
}
1149+
10971150
bool OccManager::occNeedsReset()
10981151
{
10991152
return Singleton<OccManager>::instance()._occNeedsReset();
@@ -1124,6 +1177,10 @@ namespace HTMGT
11241177
Singleton<OccManager>::instance()._setPstateTable(i_useNormal);
11251178
}
11261179

1180+
void OccManager::syncOccStates()
1181+
{
1182+
Singleton<OccManager>::instance()._syncOccStates();
1183+
}
11271184

11281185
} // end namespace
11291186

src/usr/htmgt/htmgt_occ.H

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ namespace HTMGT
343343

344344
occResetReason iv_resetReason;
345345

346+
// Value of last exception committed (to prevent duplicates)
347+
uint8_t iv_exceptionLogged;
346348

347349
/**
348350
* @brief Clear flags after OCC has been reset
@@ -490,6 +492,16 @@ namespace HTMGT
490492
uint32_t i_instance);
491493

492494

495+
/**
496+
* @brief Return the reason the system entered safe mode
497+
*
498+
* @param[out] o_instance OCC instance
499+
*
500+
* @return SRC which triggered safe mode
501+
*/
502+
static uint32_t getSafeModeReason(uint32_t & o_instance);
503+
504+
493505
/**
494506
* @brief Check if any OCCs need to be reset
495507
*
@@ -555,6 +567,13 @@ namespace HTMGT
555567
static bool occFailed();
556568

557569

570+
/**
571+
* @brief Update OCC manager state with consolidated OCC state
572+
*
573+
*/
574+
static void syncOccStates();
575+
576+
558577
private:
559578

560579
typedef std::vector<Occ*> occList_t;
@@ -641,6 +660,9 @@ namespace HTMGT
641660
void _updateSafeModeReason(uint32_t i_src,
642661
uint32_t i_instance);
643662

663+
/** See getSafeModeReason() above */
664+
uint32_t _getSafeModeReason(uint32_t & o_instance);
665+
644666
/** See occNeedsReset() above */
645667
bool _occNeedsReset();
646668

@@ -665,6 +687,8 @@ namespace HTMGT
665687
iv_normalPstateTables = i_useNormal;
666688
};
667689

690+
/** See syncOccStates() above */
691+
void _syncOccStates();
668692
};
669693

670694
typedef Singleton<OccManager> occMgr;

0 commit comments

Comments
 (0)