Skip to content

Commit

Permalink
Clear INT_CQ related firs after reseting INTRRP logic in HB
Browse files Browse the repository at this point in the history
With multi-node systems during the MPIPL steps up until the
re-init of the intrrp the nodes will be running completely
independently. Because of this we have seen errors where node C
is still sending hypervisor traffic to node A while node A has
already torn down its interrupt logic. These errors are irrelevant
as we reset the interrupt logic anyways.

CQ: SW448121
Change-Id: I910ab80da662b6ad4fb4fbc680dbf2957d2736da
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/68182
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: Dean Sanner <dsanner@us.ibm.com>
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
  • Loading branch information
crgeddes authored and wghoffa committed Nov 1, 2018
1 parent 6292186 commit 923654e
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
85 changes: 85 additions & 0 deletions src/usr/intr/intrrp.C
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,97 @@ errlHndl_t IntrRp::resetIntpForMpipl()
}
}

// Clear out any interrupt related FIRs that might have popped during the MPIPL seqeuence
err = clearAllIntFirs();

}while(0);

return err;
}


errlHndl_t IntrRp::clearAllIntFirs()
{
// Per scom definition all of these register will be cleared on any read
const uint64_t l_fir_reg_addrs_to_read_to_clear[] =
{
PU_INT_PC_ERR0_WOF, PU_INT_PC_ERR0_FATAL,
PU_INT_PC_ERR0_RECOV, PU_INT_PC_ERR0_INFO,
PU_INT_PC_ERR1_WOF, PU_INT_PC_ERR1_FATAL,
PU_INT_PC_ERR1_RECOV, PU_INT_PC_ERR1_INFO,
PU_INT_PC_VPC_WOF_ERR, PU_INT_PC_VPC_FATAL_ERR,
PU_INT_PC_VPC_RECOV_ERR, PU_INT_PC_VPC_INFO_ERR,
PU_INT_VC_WOF_ERR_G0, PU_INT_VC_WOF_ERR_G1,
PU_INT_VC_FATAL_ERR_G1, PU_INT_VC_FATAL_ERR_G0,
PU_INT_VC_RECOV_ERR_G0, PU_INT_VC_RECOV_ERR_G1,
PU_INT_VC_INFO_ERR_G0, PU_INT_VC_INFO_ERR_G1
};

// tmp var used to perform reads/writes
uint64_t l_tmp64;
errlHndl_t l_err = nullptr;
size_t l_opSize = sizeof(l_tmp64);

// Need a list of all of the functional processors for this node
TARGETING::TargetHandleList l_funcProcs;
getAllChips(l_funcProcs, TYPE_PROC);

do{
// Clear out all of the INT related firs on all functional processor
for(const auto & l_procChip : l_funcProcs)
{
// Loop through and read all of the "read to clear" regs to clear them all
for (uint8_t i = 0; i < (sizeof(l_fir_reg_addrs_to_read_to_clear) / sizeof(uint64_t)); i++)
{
l_err = deviceRead(l_procChip,
&l_tmp64,
l_opSize,
DEVICE_SCOM_ADDRESS(l_fir_reg_addrs_to_read_to_clear[i]) );

// If get a scom error it is likely no other scoms will work so bail out
if(l_err)
{
break;
}
}

// If get a scom error it is likely no other scoms will work so bail out
if(l_err)
{
break;
}

// In addition to the "read to clear" register we must also clear INT_CQ_WOF
// and INT_CQ_FIR which need to be written to with 0x0 to clear.
l_tmp64 = 0x0;
l_err = deviceWrite(l_procChip,
&l_tmp64,
l_opSize,
DEVICE_SCOM_ADDRESS(PU_INT_CQ_WOF) );

// If get a scom error it is likely no other scoms will work so bail out
if(l_err)
{
break;
}

l_err = deviceWrite(l_procChip,
&l_tmp64,
l_opSize,
DEVICE_SCOM_ADDRESS(PU_INT_CQ_FIR) );

// If get a scom error it is likely no other scoms will work so bail out
if(l_err)
{
break;
}
}
} while(0);

return l_err;

}

errlHndl_t setHbModeOnP3PCReg()
{
errlHndl_t l_err = nullptr;
Expand Down
11 changes: 11 additions & 0 deletions src/usr/intr/intrrp.H
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,17 @@ namespace INTR
*/
void printLSIInfo() const;

/**
* Clear all interrupt related firs
* This is needed because in multi-node systems nodes
* are not inconcert during the MPIPL steps so outstanding
* traffic from one node might cause FIRs to pop on another
* because they were at different points of the MPIPL. This function
* clears any firs that might have occured to allow us to start fresh
*
* @return error log handle
*/
errlHndl_t clearAllIntFirs();
};
}; // INTR namespace

Expand Down

0 comments on commit 923654e

Please sign in to comment.