Skip to content

Commit

Permalink
OCC Centaur: Check for channel checkstop
Browse files Browse the repository at this point in the history
Change-Id: I2df9675d655b0391b249e49f7fc036788268e36c
RTC: 191164
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/57280
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
Reviewed-by: Andres A. Lugo-Reyes <aalugore@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
  • Loading branch information
dgilbert999 authored and marthabroyles committed Apr 20, 2018
1 parent 4f49f63 commit 61cd385
Show file tree
Hide file tree
Showing 8 changed files with 350 additions and 138 deletions.
40 changes: 32 additions & 8 deletions src/include/registers/mcs_firmware_registers.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ typedef union mcfgpr
} mcfgpr_t;


typedef union mcsmode0
typedef union mcmcicfg
{
uint64_t value;
struct
Expand All @@ -67,16 +67,20 @@ typedef union mcsmode0
struct
{
#ifdef _BIG_ENDIAN
uint64_t reserved0 : 64;
uint64_t dontCare0 : 47;
uint64_t disable_channel_fail : 1;
uint64_t dontCare1 : 16;
#else
uint64_t reserved0 : 64;
uint64_t dontcare1 : 16;
uint64_t disable_channel_fail ; 1;
uint64_t dontCare0 : 47;
#endif
} fields;
} mcsmode0_t;
} mcmcicfg_t;



typedef union mcifir
typedef union mcchifir
{
uint64_t value;
struct
Expand All @@ -92,11 +96,31 @@ typedef union mcifir
struct
{
#ifdef _BIG_ENDIAN
uint64_t reserved0 : 64;
uint64_t fir_scom_wr_perr : 1;
uint64_t fir_scom_cfg_perr : 1;
uint64_t fir_dsrc_no_forward_progress : 1;
uint64_t fir_dsrc_perf_degrad : 1;
uint64_t fir_dmi_channel_fail : 1;
uint64_t fir_channel_init_timeout : 1;
uint64_t fir_channel_interlock_err : 1;
uint64_t dontCare0 : 5;
uint64_t fir_replay_buffer_ue : 1;
uint64_t dontCare1 : 1;
uint64_t fir_replay_buffer_overrun : 1;
uint64_t fir_df_sm_perr : 1;
uint64_t fir_cen_checkstop : 1;
uint64_t dontCare2 : 15;
uint64_t fir_dsff_tag_overrun : 1;
uint64_t dontCare3 : 7;
uint64_t fir_dsff_mca_async_cmd_error : 2;
uint64_t fir_dsff_seq_error : 1;
uint64_t dontCare4 : 18;
uint64_t fir_dsff_timeout : 1;
uint64_t dontCare5 : 2;
#else
uint64_t reserved0 : 64;
uint64_t dontCare : 64;
#endif // _BIG_ENDIAN
} fields;
} mcifir_t;
} mcchifir_t;

#endif
25 changes: 20 additions & 5 deletions src/include/registers/p9_misc_scom_addresses.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,24 @@
#define MCS_3_MCRSVDE 0x0301088E
#define MCS_3_MCRSVDF 0x0301088F

// MC Primary Memory Configuration Register (MCFGP)
#define MCS_0_MCFGP 0x0501080A
#define MCS_1_MCFGP 0x0501088A
#define MCS_2_MCFGP 0x0301080A
#define MCS_3_MCFGP 0x0301088A
// MC Memory Configuration Register FIR/CFG
#define MCP_CHAN0_CHI_FIR 0x07010900
#define MCP_CHAN1_CHI_FIR 0x07010940
#define MCP_CHAN2_CHI_FIR 0x07010980
#define MCP_CHAN3_CHI_FIR 0x070109C0
#define MCP_CHAN4_CHI_FIR 0x08010900
#define MCP_CHAN5_CHI_FIR 0x08010940
#define MCP_CHAN6_CHI_FIR 0x08010980
#define MCP_CHAN7_CHI_FIR 0x080109C0

#define MCP_CHAN0_CHI_MCICFG1Q 0x0701090E
#define MCP_CHAN1_CHI_MCICFG1Q 0x0701094E
#define MCP_CHAN2_CHI_MCICFG1Q 0x0701098E
#define MCP_CHAN3_CHI_MCICFG1Q 0x070109CE
#define MCP_CHAN4_CHI_MCICFG1Q 0x0801090E
#define MCP_CHAN5_CHI_MCICFG1Q 0x0801094E
#define MCP_CHAN6_CHI_MCICFG1Q 0x0801098E
#define MCP_CHAN7_CHI_MCICFG1Q 0x080109CE


#endif
9 changes: 7 additions & 2 deletions src/occ_405/cent/centaur_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,14 @@ bool check_centaur_checkstop(memory_control_task_t * i_memControlTask )

commitErrl(&l_err);

return TRUE; // a centaur channel checkstop error occured
return FALSE; // error was not a channel checkstop
}
return FALSE; // No centaur channel checkstop errors
else
{
// Remove the centaur sensor and all dimm sensors behind it.
cent_chan_checkstop(cent);
}
return TRUE; // Centaur channel checkstop

}

Expand Down
45 changes: 35 additions & 10 deletions src/occ_405/cent/centaur_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,26 @@ uint8_t G_centaur_nest_lfir6 = 0;
//number of SC polls to wait between i2c recovery attempts
#define CENT_SC_MAX_INTERVAL 256

//determine scom address of MCIFIR register for given Centaur n
#define MCS0_MCIFIR_N(n) \
( (n<4)? (MCS0_MCIFIR + ((MCS1_MCIFIR - MCS0_MCIFIR) * (n))) : (MCS4_MCIFIR + ((MCS5_MCIFIR - MCS4_MCIFIR) * (n-4))) )
// There was a centaur channel checkstop, remove the centaur from the enabled bitmask.
void cent_chan_checkstop(uint32_t i_cent)
{
if(CENTAUR_PRESENT(i_cent))
{
//remove checkstopped centaur from presence bitmap
G_present_centaurs &= ~(CENTAUR_BY_MASK(i_cent));

// remove the dimm temperature sensors behind this centaur
G_dimm_enabled_sensors.bytes[i_cent] = 0;

TRAC_IMP("Channel checkstop detected on Centaur[%d] G_present_centaurs[0x%08X]",
i_cent,
G_present_centaurs);

TRAC_IMP("Updated bitmap of enabled dimm temperature sensors: 0x%08X %08X",
G_dimm_enabled_sensors.words[0],
G_dimm_enabled_sensors.words[1]);
}
}

void cent_recovery(uint32_t i_cent)
{
Expand Down Expand Up @@ -255,9 +272,13 @@ void cent_recovery(uint32_t i_cent)
G_cent_scom_gpe_parms.error.rc) &&
(!(L_cent_callouts & l_cent_mask)))
{
// Check if the centaur has a channel checkstop. If it does, then do not
// log any errors
if(G_cent_scom_gpe_parms.error.rc != CENTAUR_CHANNEL_CHECKSTOP)
// Check if the centaur has a channel checkstop. If it does then remove the centaur
// from the enabled sensor bit map and do not log any errors
if(G_cent_scom_gpe_parms.error.rc == CENTAUR_CHANNEL_CHECKSTOP)
{
cent_chan_checkstop(l_prev_cent);
}
else // Make error log for inband scom errors
{
//Mark the centaur as being called out
L_cent_callouts |= l_cent_mask;
Expand Down Expand Up @@ -641,9 +662,13 @@ void centaur_data( void )
//(as long as the request was scheduled).
if(!async_request_completed(&l_centaur_data_ptr->gpe_req.request) || l_parms->error.rc )
{
// Check if the centaur has a channel checkstop. If it does, then do not
// log any errors
if(G_cent_scom_gpe_parms.error.rc != CENTAUR_CHANNEL_CHECKSTOP)
// Check if the centaur has a channel checkstop. If it does then do not
// log any errors, but remove the centaur from the config
if(l_parms->error.rc == CENTAUR_CHANNEL_CHECKSTOP)
{
cent_chan_checkstop(l_centaur_data_ptr->prev_centaur);
}
else // log the error if it was not a CENTAUR_CHANNEL_CHECKSTOP
{
//log an error the first time this happens but keep on running.
//eventually, we will timeout on the dimm & centaur temps not being updated
Expand Down Expand Up @@ -868,7 +893,7 @@ void centaur_data( void )
}
while(0);

//handle centaur i2c recovery requests and centaur workaround - Needed for P9??
//handle centaur i2c recovery requests and centaur workaround.
if(CENTAUR_PRESENT(l_centaur_data_ptr->current_centaur))
{
cent_recovery(l_centaur_data_ptr->current_centaur);
Expand Down
6 changes: 5 additions & 1 deletion src/occ_405/cent/centaur_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,11 @@ void cent_recovery(uint32_t i_cent);
//associated with the specified OCC centaur id.
CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_centaur_id );


// Create the centaur configuration object
uint32_t centaur_configuration_create( CentaurConfiguration_t * i_centaurConfiguration );

// Remove centaur from enabled sensor list due to channel checkstop
void cent_chan_checkstop(uint32_t i_cent);

#endif //_CENTAUR_DATA_H

7 changes: 7 additions & 0 deletions src/occ_gpe1/gpe_centaur.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,11 @@ void gpe_scom_centaur(CentaurConfiguration_t* i_config,
int centaur_get_mem_data(CentaurConfiguration_t* i_config,
CentaurGetMemDataParms_t* i_parms);

/**
* Check for channel checkstop
* @param[in] The ordinal centaur number
* @return [0 | return code]
*/
int check_channel_chkstp(unsigned int i_centaur);

#endif
103 changes: 80 additions & 23 deletions src/occ_gpe1/gpe_centaur_configuration.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,28 @@ const uint32_t MCFGPR[OCCHW_NCENTAUR] =
MCS_3_MCRSVDF
};

//const uint32_t MCSMODE0[OCCHW_NCENTAUR / 2] __attribute__((section(".sdata2"))) =
//{
// MCS_0_MCMODE0,
// MCS_1_MCMODE0,
// MCS_2_MCMODE0,
// MCS_3_MCMODE0
//};

//const uint32_t MCFIR[OCCHW_NCENTAUR / 2] __attribute((section(".sdata2"))) =
//{
// MCS_0_MCFIR,
// MCS_1_MCFIR,
// MCS_2_MCFIR,
// MCS_3_MCFIR
//};

const uint32_t MCFGP[OCCHW_NCENTAUR/2] =
const uint32_t MCCHIFIR[OCCHW_NCENTAUR] =
{
MCS_0_MCFGP,
MCS_1_MCFGP,
MCS_2_MCFGP,
MCS_3_MCFGP
MCP_CHAN0_CHI_FIR,
MCP_CHAN1_CHI_FIR,
MCP_CHAN2_CHI_FIR,
MCP_CHAN3_CHI_FIR,
MCP_CHAN4_CHI_FIR,
MCP_CHAN5_CHI_FIR,
MCP_CHAN6_CHI_FIR,
MCP_CHAN7_CHI_FIR
};

const uint32_t MCMCICFG1Q[OCCHW_NCENTAUR] =
{
MCP_CHAN0_CHI_MCICFG1Q,
MCP_CHAN1_CHI_MCICFG1Q,
MCP_CHAN2_CHI_MCICFG1Q,
MCP_CHAN3_CHI_MCICFG1Q,
MCP_CHAN4_CHI_MCICFG1Q,
MCP_CHAN5_CHI_MCICFG1Q,
MCP_CHAN6_CHI_MCICFG1Q,
MCP_CHAN7_CHI_MCICFG1Q
};

///////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -222,8 +222,15 @@ int gpe_centaur_configuration_create(CentaurConfiguration_t* o_config)
{
uint64_t val64;

//FIR bits have changed from p8 TODO Marc Golub to provide what
//firchecks need to be done for P9
// check for channel checkstop
rc = check_channel_chkstp(i);
if (rc)
{
// If scom failed OR there is a channel checkstop then
// Centaur is not usable.
rc = 0;
continue;
}

// Verify that inband scom has been setup. If not then
// assume the centaur is either non-existant or not configured.
Expand Down Expand Up @@ -420,3 +427,53 @@ int gpe_centaur_configuration_create(CentaurConfiguration_t* o_config)
return rc;
}

int check_channel_chkstp(unsigned int i_centaur)
{
int rc = 0;
mcchifir_t chifir;
mcmcicfg_t chicfg;

do
{
rc = getscom_abs(MCCHIFIR[i_centaur], &(chifir.value));
if (rc)
{
PK_TRACE("MCCHIFIR scom failed. rc = %d",rc);
break;
}

if(chifir.fields.fir_dsrc_no_forward_progress ||
chifir.fields.fir_dmi_channel_fail ||
chifir.fields.fir_channel_init_timeout ||
chifir.fields.fir_channel_interlock_err ||
chifir.fields.fir_replay_buffer_ue ||
chifir.fields.fir_replay_buffer_overrun ||
chifir.fields.fir_df_sm_perr ||
chifir.fields.fir_cen_checkstop ||
chifir.fields.fir_dsff_tag_overrun ||
chifir.fields.fir_dsff_mca_async_cmd_error ||
chifir.fields.fir_dsff_seq_error ||
chifir.fields.fir_dsff_timeout)
{
PK_TRACE("MCCHIFIR: %08x%08x for channel %d",
chifir.words.high_order,
chifir.words.low_order,
i_centaur);

rc = getscom_abs(MCMCICFG1Q[i_centaur], &(chicfg.value));
if (rc)
{
PK_TRACE("MCMCICFG scom failed. rc = %d",rc);
break;
}

PK_TRACE("MCMCICFG1Q %08x%08x",
chicfg.words.high_order,
chicfg.words.low_order);

rc = CENTAUR_CHANNEL_CHECKSTOP;
}
} while(0);

return rc;
}

0 comments on commit 61cd385

Please sign in to comment.