5
5
/* */
6
6
/* OpenPOWER HostBoot Project */
7
7
/* */
8
- /* Contributors Listed Below - COPYRIGHT 2016,2019 */
8
+ /* Contributors Listed Below - COPYRIGHT 2016,2020 */
9
9
/* [+] International Business Machines Corp. */
10
10
/* */
11
11
/* */
@@ -442,7 +442,7 @@ uint32_t __analyzeHealthStatus0Reg(STEP_CODE_DATA_STRUCT & io_sc,
442
442
PRDF_ERR ( PRDF_FUNC "Failed to read Health Status0 Register. "
443
443
"HUID: 0x%08x" , getHuid (i_dimm ) );
444
444
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
445
- o_rc = FAIL ;
445
+ o_rc = PRD_SCANCOM_FAILURE ;
446
446
break ;
447
447
}
448
448
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( data );
@@ -551,7 +551,7 @@ uint32_t __analyzeHealthStatus1Reg( STEP_CODE_DATA_STRUCT & io_sc,
551
551
PRDF_ERR ( PRDF_FUNC "Failed to read Health Status1 Register. "
552
552
"HUID: 0x%08x" , getHuid (i_dimm ) );
553
553
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
554
- o_rc = FAIL ;
554
+ o_rc = PRD_SCANCOM_FAILURE ;
555
555
break ;
556
556
}
557
557
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( data );
@@ -710,7 +710,7 @@ uint32_t __readTemp( TargetHandle_t i_dimm, uint16_t i_tempMsbReg,
710
710
PRDF_ERR ( PRDF_FUNC "Failed to read ES Temperature MSB Register. "
711
711
"HUID: 0x%08x" , getHuid (i_dimm ) );
712
712
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
713
- o_rc = FAIL ;
713
+ o_rc = PRD_SCANCOM_FAILURE ;
714
714
break ;
715
715
}
716
716
@@ -721,7 +721,7 @@ uint32_t __readTemp( TargetHandle_t i_dimm, uint16_t i_tempMsbReg,
721
721
PRDF_ERR ( PRDF_FUNC "Failed to read ES Temperature LSB Register. "
722
722
"HUID: 0x%08x" , getHuid (i_dimm ) );
723
723
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
724
- o_rc = FAIL ;
724
+ o_rc = PRD_SCANCOM_FAILURE ;
725
725
break ;
726
726
}
727
727
@@ -768,7 +768,7 @@ uint32_t __analyzeErrorThrStatusReg( STEP_CODE_DATA_STRUCT & io_sc,
768
768
PRDF_ERR ( PRDF_FUNC "Failed to read Error Threshold Status Reg. "
769
769
"HUID: 0x%08x" , getHuid (i_dimm ) );
770
770
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
771
- o_rc = FAIL ;
771
+ o_rc = PRD_SCANCOM_FAILURE ;
772
772
break ;
773
773
}
774
774
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( data );
@@ -899,7 +899,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
899
899
PRDF_ERR ( PRDF_FUNC "Failed to read Warning Threshold Reg. HUID: "
900
900
"0x%08x" , getHuid (i_dimm ) );
901
901
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
902
- o_rc = FAIL ;
902
+ o_rc = PRD_SCANCOM_FAILURE ;
903
903
break ;
904
904
}
905
905
@@ -912,7 +912,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
912
912
PRDF_ERR ( PRDF_FUNC "Failed to read Error Threshold Reg. HUID: "
913
913
"0x%08x" , getHuid (i_dimm ) );
914
914
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
915
- o_rc = FAIL ;
915
+ o_rc = PRD_SCANCOM_FAILURE ;
916
916
break ;
917
917
}
918
918
@@ -941,7 +941,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
941
941
PRDF_ERR ( PRDF_FUNC "Failed to clear Set Event Notification "
942
942
"Cmd Reg. HUID: 0x%08x" , getHuid (i_dimm ) );
943
943
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
944
- o_rc = FAIL ;
944
+ o_rc = PRD_SCANCOM_FAILURE ;
945
945
break ;
946
946
}
947
947
@@ -955,7 +955,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
955
955
PRDF_ERR ( PRDF_FUNC "Failed to read Set Event Notification "
956
956
"Status Reg. HUID: 0x%08x" , getHuid (i_dimm ) );
957
957
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
958
- o_rc = FAIL ;
958
+ o_rc = PRD_SCANCOM_FAILURE ;
959
959
break ;
960
960
}
961
961
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( notifStat );
@@ -991,7 +991,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
991
991
PRDF_ERR ( PRDF_FUNC "Failed to write Warning Threshold Reg. "
992
992
"HUID: 0x%08x" , getHuid (i_dimm ) );
993
993
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
994
- o_rc = FAIL ;
994
+ o_rc = PRD_SCANCOM_FAILURE ;
995
995
break ;
996
996
}
997
997
@@ -1004,7 +1004,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
1004
1004
PRDF_ERR ( PRDF_FUNC "Failed to write Set Event Notification "
1005
1005
"Cmd Reg. HUID: 0x%08x" , getHuid (i_dimm ) );
1006
1006
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1007
- o_rc = FAIL ;
1007
+ o_rc = PRD_SCANCOM_FAILURE ;
1008
1008
break ;
1009
1009
}
1010
1010
@@ -1017,7 +1017,7 @@ uint32_t __adjustThreshold( STEP_CODE_DATA_STRUCT & io_sc,
1017
1017
PRDF_ERR ( PRDF_FUNC "Failed to read Set Event Notification "
1018
1018
"Status Reg. HUID: 0x%08x" , getHuid (i_dimm ) );
1019
1019
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1020
- o_rc = FAIL ;
1020
+ o_rc = PRD_SCANCOM_FAILURE ;
1021
1021
break ;
1022
1022
}
1023
1023
bitList = __nvdimmGetActiveBits ( notifStat );
@@ -1084,7 +1084,7 @@ uint32_t __analyzeWarningThrStatusReg(STEP_CODE_DATA_STRUCT & io_sc,
1084
1084
PRDF_ERR ( PRDF_FUNC "Failed to read Warning Threshold Status Reg. "
1085
1085
"HUID: 0x%08x" , getHuid (i_dimm ) );
1086
1086
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1087
- o_rc = FAIL ;
1087
+ o_rc = PRD_SCANCOM_FAILURE ;
1088
1088
break ;
1089
1089
}
1090
1090
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( data );
@@ -1298,7 +1298,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm )
1298
1298
PRDF_ERR ( PRDF_FUNC "Failed to read NVDIMM_MGT_CMD1. "
1299
1299
"HUID: 0x%08x" , getHuid (i_dimm ) );
1300
1300
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1301
- o_rc = FAIL ;
1301
+ o_rc = PRD_SCANCOM_FAILURE ;
1302
1302
break ;
1303
1303
}
1304
1304
@@ -1313,7 +1313,7 @@ uint32_t __deassertEventN( TargetHandle_t i_dimm )
1313
1313
PRDF_ERR ( PRDF_FUNC "Failed to write NVDIMM_MGT_CMD1. "
1314
1314
"HUID: 0x%08x" , getHuid (i_dimm ) );
1315
1315
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1316
- o_rc = FAIL ;
1316
+ o_rc = PRD_SCANCOM_FAILURE ;
1317
1317
break ;
1318
1318
}
1319
1319
@@ -1378,6 +1378,20 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
1378
1378
PRDF_ERR ( PRDF_FUNC "Failed to read Module Health Register. "
1379
1379
"HUID: 0x%08x" , getHuid (dimm ) );
1380
1380
PRDF_COMMIT_ERRL ( errl , ERRL_ACTION_REPORT );
1381
+
1382
+ // If we got a failure reading one of the NVDIMM registers,
1383
+ // add a signature and make the log predictive.
1384
+ __addSignature ( io_sc , i_chip -> getTrgt (), errFound ,
1385
+ PRDFSIG_NvdimmReadFail );
1386
+ errFound = true;
1387
+ io_sc .service_data -> SetThresholdMaskId (0 );
1388
+
1389
+ // Callout NVDIMM
1390
+ io_sc .service_data -> SetCallout ( dimm , MRU_MED , NO_GARD );
1391
+
1392
+ // Send message to PHYP that save/restore may work
1393
+ l_rc = PlatServices ::nvdimmNotifyProtChange ( dimm ,
1394
+ NVDIMM ::NVDIMM_RISKY_HW_ERROR );
1381
1395
continue ;
1382
1396
}
1383
1397
std ::map < uint8_t ,bool > bitList = __nvdimmGetActiveBits ( data );
@@ -1387,17 +1401,33 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
1387
1401
{
1388
1402
// Analyze Health Status0 Reg, Health Status1 Reg,
1389
1403
// and Error Theshold Status Reg
1390
- l_rc = __analyzeHealthStatus0Reg ( io_sc , dimm , errFound );
1391
- if ( SUCCESS != l_rc ) continue ;
1392
- l_rc = __analyzeHealthStatus1Reg ( io_sc , dimm , errFound );
1393
- if ( SUCCESS != l_rc ) continue ;
1394
- bool esTempErr = false;
1395
- l_rc = __analyzeErrorThrStatusReg (io_sc , dimm , errFound , esTempErr );
1396
- if ( SUCCESS != l_rc ) continue ;
1404
+ bool esTmpErr = false;
1405
+ uint32_t l_rcStat0 = SUCCESS ;
1406
+ uint32_t l_rcStat1 = SUCCESS ;
1407
+ uint32_t l_rcErrTh = SUCCESS ;
1408
+ l_rcStat0 = __analyzeHealthStatus0Reg ( io_sc , dimm , errFound );
1409
+ l_rcStat1 = __analyzeHealthStatus1Reg ( io_sc , dimm , errFound );
1410
+ l_rcErrTh = __analyzeErrorThrStatusReg ( io_sc , dimm , errFound ,
1411
+ esTmpErr );
1412
+
1413
+ if ( PRD_SCANCOM_FAILURE == l_rcStat0 ||
1414
+ PRD_SCANCOM_FAILURE == l_rcStat1 ||
1415
+ PRD_SCANCOM_FAILURE == l_rcErrTh )
1416
+ {
1417
+ // If we got a failure reading one of the NVDIMM registers,
1418
+ // add a signature and make the log predictive.
1419
+ __addSignature ( io_sc , i_chip -> getTrgt (), errFound ,
1420
+ PRDFSIG_NvdimmReadFail );
1421
+ errFound = true;
1422
+ io_sc .service_data -> SetThresholdMaskId (0 );
1423
+
1424
+ // Callout NVDIMM
1425
+ io_sc .service_data -> SetCallout ( dimm , MRU_MED , NO_GARD );
1426
+ }
1397
1427
1398
1428
// If we hit an ES temperature error and have not yet hit threshold,
1399
1429
// then keep the log hidden.
1400
- if ( esTempErr && !io_sc .service_data -> IsAtThreshold () ) continue ;
1430
+ if ( esTmpErr && !io_sc .service_data -> IsAtThreshold () ) continue ;
1401
1431
1402
1432
// If we didn't find any error, then keep the log hidden.
1403
1433
if ( !errFound )
@@ -1426,7 +1456,22 @@ int32_t AnalyzeNvdimmHealthStatRegs( ExtensibleChip * i_chip,
1426
1456
else if ( bitList .count (1 ) )
1427
1457
{
1428
1458
l_rc = __analyzeWarningThrStatusReg ( io_sc , dimm , errFound );
1429
- if ( SUCCESS != l_rc ) continue ;
1459
+ if ( PRD_SCANCOM_FAILURE == l_rc )
1460
+ {
1461
+ // If we got a failure reading one of the NVDIMM registers,
1462
+ // add a signature and make the log predictive.
1463
+ __addSignature ( io_sc , i_chip -> getTrgt (), errFound ,
1464
+ PRDFSIG_NvdimmReadFail );
1465
+ errFound = true;
1466
+ io_sc .service_data -> SetThresholdMaskId (0 );
1467
+
1468
+ // Callout NVDIMM
1469
+ io_sc .service_data -> SetCallout ( dimm , MRU_MED , NO_GARD );
1470
+
1471
+ // Send message to PHYP that save/restore may work
1472
+ l_rc = PlatServices ::nvdimmNotifyProtChange ( dimm ,
1473
+ NVDIMM ::NVDIMM_RISKY_HW_ERROR );
1474
+ }
1430
1475
1431
1476
if ( !errFound )
1432
1477
{
0 commit comments