Skip to content

Commit

Permalink
Apply MC_HANG timeout workaround to all processors
Browse files Browse the repository at this point in the history
When we initially added this workaround for whatever reason we
specified the master proc only. It needs to be applied to all
procesors in the system.

Change-Id: I5840f2b0670f1790393a385f045ee656ad76bef8
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/83594
Reviewed-by: Michael Baiocchi <mbaiocch@us.ibm.com>
Reviewed-by: Zachary Clark <zach@ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M Crowell <dcrowell@us.ibm.com>
  • Loading branch information
crgeddes authored and dcrowell77 committed Sep 13, 2019
1 parent c049efe commit 8ab48e7
Showing 1 changed file with 45 additions and 59 deletions.
104 changes: 45 additions & 59 deletions src/usr/isteps/istep10/call_proc_chiplet_scominit.C
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void* call_proc_chiplet_scominit( void *io_pArgs )
IStepError l_stepError;

TRACFCOMP(g_trac_isteps_trace, ENTER_MRK"call_proc_chiplet_scominit entry" );

do{

if (!INITSERVICE::isSMPWrapConfig())
Expand Down Expand Up @@ -109,71 +109,57 @@ void* call_proc_chiplet_scominit( void *io_pArgs )
HWPF_COMP_ID);
}
}

// TODO RTC: 213932 Remove workaround to ignore MC channel hang

#ifdef CONFIG_AXONE_BRING_UP
TARGETING::TargetHandleList l_cpuTargetList;
getAllChips(l_cpuTargetList, TYPE_PROC);

//
// Identify the master processor
//
TARGETING::Target * l_masterProc = nullptr;
TARGETING::Target * l_masterNode = nullptr;
const bool l_onlyFunctional = true; // Make sure masterproc is functional
l_err = TARGETING::targetService().queryMasterProcChipTargetHandle(
l_masterProc,
l_masterNode,
l_onlyFunctional);

if(l_err)
// Apply workaround to remove MC hang timeouts because they
// are forcing false negatives to all proc functional chips
for (const auto & l_procChip: l_cpuTargetList)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"ERROR : call_proc_chiplet_scominit: "
"queryMasterProcChipTargetHandle() returned PLID=0x%x",
l_err->plid() );
// Create IStep error log and cross reference error that occurred
l_stepError.addErrorDetails(l_err);
// Commit error
errlCommit( l_err, HWPF_COMP_ID );
break;
}

TARGETING::TargetHandleList l_miTargetList;
TARGETING::getChildAffinityTargets( l_miTargetList, l_masterProc, CLASS_UNIT, TYPE_MI );

const uint64_t MCS_TIMEOUT_CONTROL_REG = 0x501081B;

for(const auto & l_mi : l_miTargetList)
{
uint64_t l_mcsTimeoutControlValue;
size_t l_regSize = sizeof(l_mcsTimeoutControlValue);
l_err = deviceRead(l_mi, &l_mcsTimeoutControlValue, l_regSize,
DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG));

// Clear bit 33 and re-write the scom register with new value.
// When this bit is cleared it allows extra time for gemini card
// before a channel hang is declared
l_mcsTimeoutControlValue &= ~(1UL << 30);

l_err = deviceWrite(l_mi, &l_mcsTimeoutControlValue, l_regSize,
DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG));

if(l_err)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"ERROR : call_proc_chiplet_scominit: "
"deviceWrite on DEVICE_SCOM_ADDRESS MCS_TIMEOUT_CONTROL_REG returned PLID=0x%x",
l_err->plid() );
// Create IStep error log and cross reference error that occurred
l_stepError.addErrorDetails(l_err);
// Commit error
errlCommit( l_err, HWPF_COMP_ID );
break;
}
TARGETING::TargetHandleList l_miTargetList;
TARGETING::getChildAffinityTargets( l_miTargetList, l_procChip, CLASS_UNIT, TYPE_MI );

const uint64_t MCS_TIMEOUT_CONTROL_REG = 0x501081B;

for(const auto & l_mi : l_miTargetList)
{
uint64_t l_mcsTimeoutControlValue;
size_t l_regSize = sizeof(l_mcsTimeoutControlValue);
l_err = deviceRead(l_mi, &l_mcsTimeoutControlValue, l_regSize,
DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG));

// Clear bit 33 and re-write the scom register with new value.
// When this bit is cleared it allows extra time for gemini card
// before a channel hang is declared
l_mcsTimeoutControlValue &= ~(1UL << 30);

l_err = deviceWrite(l_mi, &l_mcsTimeoutControlValue, l_regSize,
DEVICE_SCOM_ADDRESS(MCS_TIMEOUT_CONTROL_REG));

if(l_err)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"ERROR : call_proc_chiplet_scominit: "
"deviceWrite on DEVICE_SCOM_ADDRESS MCS_TIMEOUT_CONTROL_REG returned PLID=0x%x",
l_err->plid() );
// error will be handled below
break;
}
}

if(l_err)
{
// Create IStep error log and cross reference error that occurred
l_stepError.addErrorDetails(l_err);
// Commit error
errlCommit( l_err, HWPF_COMP_ID );
break;
}
}
#endif

}while(0);

TRACFCOMP(g_trac_isteps_trace, EXIT_MRK"call_proc_chiplet_scominit exit" );
Expand Down

0 comments on commit 8ab48e7

Please sign in to comment.