From e3c65ee89e76e3de0548abc9df4c7f1768db9402 Mon Sep 17 00:00:00 2001 From: mbroyles Date: Mon, 24 Jun 2019 12:24:46 -0500 Subject: [PATCH] Explorer internal sensor workaround Fixes for 16 OCMB support Change-Id: Ide04de3c2b1bb09654890d3334530cb20e327e83 RTC: 211961 Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/80920 Tested-by: FSP CI Jenkins Reviewed-by: Douglas R Gilbert Reviewed-by: Christopher J Cain --- src/occ_405/amec/amec_controller.c | 236 ++++++++++---- src/occ_405/amec/amec_controller.h | 4 +- src/occ_405/amec/amec_data.c | 92 +++++- src/occ_405/amec/amec_freq.c | 24 ++ src/occ_405/amec/amec_freq.h | 5 +- src/occ_405/amec/amec_health.c | 126 +++++++- src/occ_405/amec/amec_init.c | 15 + src/occ_405/amec/amec_sensors_ocmb.c | 364 +++++++++++++--------- src/occ_405/amec/amec_sys.h | 27 +- src/occ_405/cent/centaur_data.c | 49 ++- src/occ_405/cent/centaur_data.h | 1 + src/occ_405/cent/ocmb_data.c | 6 +- src/occ_405/cmdh/cmdh_fsp_cmds.c | 11 +- src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c | 87 +++++- src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h | 10 +- src/occ_405/occbuildname.c | 2 +- src/occ_405/sensor/sensor_enum.h | 3 + src/occ_405/sensor/sensor_info.c | 3 + src/occ_405/sensor/sensor_table.c | 6 + 19 files changed, 802 insertions(+), 269 deletions(-) diff --git a/src/occ_405/amec/amec_controller.c b/src/occ_405/amec/amec_controller.c index 1f02edab..a74ad2ba 100644 --- a/src/occ_405/amec/amec_controller.c +++ b/src/occ_405/amec/amec_controller.c @@ -36,6 +36,8 @@ //************************************************************************* extern dimm_sensor_flags_t G_dimm_temp_expired_bitmap; extern uint16_t G_cent_temp_expired_bitmap; +extern uint8_t G_ocm_dts_type_expired_bitmap; + //************************************************************************* // Macros //************************************************************************* @@ -246,8 +248,8 @@ void amec_controller_vrm_vdd_thermal() // Description: This function implements the Proportional Controller for the // DIMM thermal control. Although it doesn't return any // results, it populates the thermal vote in the field -// g_amec->thermaldimm.speed_request. -// +// g_amec->thermaldimm.speed_request, g_amec->thermalmcdimm.speed_request, +// g_amec->thermalpmic.speed_request and g_amec->thermalmcext.speed_request, // Task Flags: // // End Function Specification @@ -256,82 +258,197 @@ void amec_controller_dimm_thermal() /*------------------------------------------------------------------------*/ /* Local Variables */ /*------------------------------------------------------------------------*/ + uint8_t i = 0; + uint8_t l_max_dimm_types = 0; + const uint16_t l_dimm_types[4] = {DATA_FRU_DIMM, + DATA_FRU_MEMCTRL_DRAM, + DATA_FRU_PMIC, + DATA_FRU_MEMCTRL_EXT}; uint16_t l_thermal_winner = 0; uint16_t l_residue = 0; uint16_t l_old_residue = 0; + uint16_t l_throttle_temp = 0; + uint16_t l_Pgain = 0; + uint16_t * l_speed_request = NULL; + uint16_t * l_total_res = NULL; int16_t l_error = 0; int16_t l_mem_speed = 0; int16_t l_throttle_chg = 0; int32_t l_throttle = 0; sensor_t * l_sensor = NULL; + bool l_timeout = false; /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ - // Get TEMPDIMMTHRM sensor value - l_sensor = getSensorByGsid(TEMPDIMMTHRM); - - if(G_dimm_temp_expired_bitmap.dw[0] || - G_dimm_temp_expired_bitmap.dw[1]) - { - //we were not able to read one or more dimm temperatures. - //Assume temperature is at the setpoint plus 1 degree C. - l_thermal_winner = g_amec->thermaldimm.setpoint + 10; - } - else - { - // Use the highest temperature of all DIMMs in 0.1 degrees C - l_thermal_winner = l_sensor->sample * 10; - } - - // Check if there is an error - if (g_amec->thermaldimm.setpoint == l_thermal_winner) - return; - - // Calculate the thermal control error - l_error = g_amec->thermaldimm.setpoint - l_thermal_winner; - - // Proportional Controller for the thermal control loop based on DIMM - // temperatures - l_throttle = (int32_t) l_error * g_amec->thermaldimm.Pgain; - l_residue = (uint16_t) l_throttle; - l_throttle_chg = (int16_t) (l_throttle >> 16); - - if ((int16_t) l_throttle_chg > AMEC_MEMORY_SPEED_CHANGE_LIMIT) + // loop for the number of different fru types the "dimm" sensors can be + // to determine memory throttle based on each type + if(MEM_TYPE_OCM == G_sysConfigData.mem_type) { - l_throttle_chg = AMEC_MEMORY_SPEED_CHANGE_LIMIT; + // all 4 types are possible: + l_max_dimm_types = 4; } else { - if ((int16_t) l_throttle_chg < (-AMEC_MEMORY_SPEED_CHANGE_LIMIT)) - { - l_throttle_chg = -AMEC_MEMORY_SPEED_CHANGE_LIMIT; - } + // can only be the one DATA_FRU_DIMM type which must be listed first in l_dimm_types + l_max_dimm_types = 1; } - // Calculate the new thermal speed request for DIMMs - l_mem_speed = g_amec->thermaldimm.speed_request + - (int16_t) l_throttle_chg * AMEC_MEMORY_STEP_SIZE; - - // Proceed with residue summation to correctly follow set-point - l_old_residue = g_amec->thermaldimm.total_res; - g_amec->thermaldimm.total_res += l_residue; - if (g_amec->thermaldimm.total_res < l_old_residue) + for(i= 0; i < l_max_dimm_types; i++) { - l_mem_speed += AMEC_MEMORY_STEP_SIZE; - } - - // Enforce actuator saturation limits - if (l_mem_speed > AMEC_MEMORY_MAX_STEP) - l_mem_speed = AMEC_MEMORY_MAX_STEP; - if (l_mem_speed < AMEC_MEMORY_MIN_STEP) - l_mem_speed = AMEC_MEMORY_MIN_STEP; - - // Generate the new thermal speed request - g_amec->thermaldimm.speed_request = (uint16_t) l_mem_speed; - - // Update the Memory OT Throttle Sensor - if(g_amec->thermaldimm.speed_request < AMEC_MEMORY_MAX_STEP) + l_timeout = false; // default this type did not timeout + + // setup vars specific for type being processed + if(l_dimm_types[i] == DATA_FRU_DIMM) + { + // use control values for DATA_FRU_DIMM type + l_throttle_temp = g_amec->thermaldimm.setpoint; + l_Pgain = g_amec->thermaldimm.Pgain; + l_speed_request = &g_amec->thermaldimm.speed_request; + l_total_res = &g_amec->thermaldimm.total_res; + + // Get the highest DIMM temperature in 0.1 degrees C + l_sensor = getSensorByGsid(TEMPDIMMTHRM); + l_thermal_winner = l_sensor->sample * 10; + + // check for time out + if(G_dimm_temp_expired_bitmap.dw[0] || G_dimm_temp_expired_bitmap.dw[1]) + { + if(MEM_TYPE_OCM != G_sysConfigData.mem_type) + { + // non-OCM can only have DIMM type so timeout must be for DIMM + l_timeout = true; + } + else if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_DIMM_MASK) // MEM_TYPE_OCM + { + l_timeout = true; + } + } + } // end if DATA_FRU_DIMM + else if(l_dimm_types[i] == DATA_FRU_MEMCTRL_DRAM) + { + // use control values for DATA_FRU_MEMCTRL_DRAM type + l_throttle_temp = g_amec->thermalmcdimm.setpoint; + l_Pgain = g_amec->thermalmcdimm.Pgain; + l_speed_request = &g_amec->thermalmcdimm.speed_request; + l_total_res = &g_amec->thermalmcdimm.total_res; + + // Get the highest Memctrl+DRAM temperature in 0.1 degrees C + l_sensor = getSensorByGsid(TEMPMCDIMMTHRM); + l_thermal_winner = l_sensor->sample * 10; + + // check if this type timed out + if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_MEMCTRL_DRAM_MASK) + { + l_timeout = true; + } + } + else if(l_dimm_types[i] == DATA_FRU_PMIC) + { + // use control values for DATA_FRU_PMIC type + l_throttle_temp = g_amec->thermalpmic.setpoint; + l_Pgain = g_amec->thermalpmic.Pgain; + l_speed_request = &g_amec->thermalpmic.speed_request; + l_total_res = &g_amec->thermalpmic.total_res; + + // Get the highest PMIC temperature in 0.1 degrees C + l_sensor = getSensorByGsid(TEMPPMICTHRM); + l_thermal_winner = l_sensor->sample * 10; + + // check if this type timed out + if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_PMIC_MASK) + { + l_timeout = true; + } + } + else if(l_dimm_types[i] == DATA_FRU_MEMCTRL_EXT) + { + // use control values for DATA_FRU_MEMCTRL_EXT type + l_throttle_temp = g_amec->thermalmcext.setpoint; + l_Pgain = g_amec->thermalmcext.Pgain; + l_speed_request = &g_amec->thermalmcext.speed_request; + l_total_res = &g_amec->thermalmcext.total_res; + + // Get the highest external mem controller temperature in 0.1 degrees C + l_sensor = getSensorByGsid(TEMPMCEXTTHRM); + l_thermal_winner = l_sensor->sample * 10; + + // check if this type timed out + if(G_ocm_dts_type_expired_bitmap & OCM_DTS_TYPE_MEMCTRL_EXT_MASK) + { + l_timeout = true; + } + } + else + { + // should never happen -- code bug + TRAC_ERR("amec_controller_dimm_thermal: Invalid DIMM sensor type[0x%02X] at idx[%d]", + l_dimm_types[i], + i); + continue; + } + + // start common code for all types to determine throttle level + // Adjust the temperature if there was a time out reading this sensor fru type + if(l_timeout) + { + //Assume temperature is at the throttle temp plus 1 degree C. + l_thermal_winner = l_throttle_temp + 10; + } + + // Check if this type is being used and the temp differs from the throttle point + if( (!l_thermal_winner) || (l_throttle_temp == l_thermal_winner) ) + continue; + + // Calculate the thermal control error + l_error = l_throttle_temp - l_thermal_winner; + + // Proportional Controller for the thermal control loop based on memory temperatures + l_throttle = (int32_t) l_error * l_Pgain; + l_residue = (uint16_t) l_throttle; + l_throttle_chg = (int16_t) (l_throttle >> 16); + + if ((int16_t) l_throttle_chg > AMEC_MEMORY_SPEED_CHANGE_LIMIT) + { + l_throttle_chg = AMEC_MEMORY_SPEED_CHANGE_LIMIT; + } + else + { + if ((int16_t) l_throttle_chg < (-AMEC_MEMORY_SPEED_CHANGE_LIMIT)) + { + l_throttle_chg = -AMEC_MEMORY_SPEED_CHANGE_LIMIT; + } + } + + // Calculate the new thermal speed request + l_mem_speed = *l_speed_request + + (int16_t) l_throttle_chg * AMEC_MEMORY_STEP_SIZE; + + // Proceed with residue summation to correctly follow set-point + l_old_residue = *l_total_res; + *l_total_res += l_residue; + if (*l_total_res < l_old_residue) + { + l_mem_speed += AMEC_MEMORY_STEP_SIZE; + } + + // Enforce actuator saturation limits + if (l_mem_speed > AMEC_MEMORY_MAX_STEP) + l_mem_speed = AMEC_MEMORY_MAX_STEP; + if (l_mem_speed < AMEC_MEMORY_MIN_STEP) + l_mem_speed = AMEC_MEMORY_MIN_STEP; + + // Save the new thermal speed request for this memory sensor type + *l_speed_request = (uint16_t) l_mem_speed; + + } // end for loop processing each memory sensor type + + // Done processing all types now determine if any of them are driving throttling + // and update the Memory OT Throttle Sensor + if( (g_amec->thermaldimm.speed_request < AMEC_MEMORY_MAX_STEP) || + (g_amec->thermalmcdimm.speed_request < AMEC_MEMORY_MAX_STEP) || + (g_amec->thermalpmic.speed_request < AMEC_MEMORY_MAX_STEP) || + (g_amec->thermalmcext.speed_request < AMEC_MEMORY_MAX_STEP) ) { // Memory speed is less than max indicate throttle due to OT sensor_update(AMECSENSOR_PTR(MEMOTTHROT), 1); @@ -340,7 +457,6 @@ void amec_controller_dimm_thermal() { sensor_update(AMECSENSOR_PTR(MEMOTTHROT), 0); } - } diff --git a/src/occ_405/amec/amec_controller.h b/src/occ_405/amec/amec_controller.h index 97e379c9..8808a035 100644 --- a/src/occ_405/amec/amec_controller.h +++ b/src/occ_405/amec/amec_controller.h @@ -117,8 +117,8 @@ void amec_controller_membuf_thermal(); * This function implements a Proportional Controller for the * thermal control loop based on DIMM temperatures. Although it * doesn't return any results, it populates the thermal vote in - * the field g_amec->thermaldimm.speed_request. - * + * the fields g_amec->thermaldimm.speed_request, g_amec->thermalmcdimm.speed_request, + * g_amec->thermalpmic.speed_request and g_amec->thermalmcext.speed_request */ void amec_controller_dimm_thermal(); diff --git a/src/occ_405/amec/amec_data.c b/src/occ_405/amec/amec_data.c index 43ff5b39..6aa6b78f 100755 --- a/src/occ_405/amec/amec_data.c +++ b/src/occ_405/amec/amec_data.c @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2018 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -268,6 +268,96 @@ errlHndl_t AMEC_data_write_thrm_thresholds(const OCC_MODE i_mode) TRAC_INFO("AMEC_data_write_thrm_thresholds: DIMM setpoints - DVFS: %u, Error: %u", l_dvfs_temp, l_error); + // Store the Memctrl+DIMM thermal data + if (!l_pm_limits) + { + // use normal thresholds for Nominal or OPAL + l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_DRAM].dvfs; + l_error = l_frudata[DATA_FRU_MEMCTRL_DRAM].error; + } + else + { + l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_DRAM].pm_dvfs; + if(i_mode == OCC_MODE_TURBO) + { + //Need to log an error if we throttle in static turbo mode (for mfg) + l_error = l_dvfs_temp; + } + else + { + l_error = l_frudata[DATA_FRU_MEMCTRL_DRAM].pm_error; + } + } + // Store the DVFS thermal setpoint in 0.1 degrees C + g_amec->thermalmcdimm.setpoint = l_dvfs_temp * 10; + // Store the error temperature for OT detection + g_amec->thermalmcdimm.ot_error = l_error; + // Store the temperature timeout value + g_amec->thermalmcdimm.temp_timeout = l_frudata[DATA_FRU_DIMM].max_read_timeout; + + TRAC_INFO("AMEC_data_write_thrm_thresholds: MC+DIMM setpoints - DVFS: %u, Error: %u", + l_dvfs_temp, l_error); + + // Store the PMIC thermal data + if (!l_pm_limits) + { + // use normal thresholds for Nominal or OPAL + l_dvfs_temp = l_frudata[DATA_FRU_PMIC].dvfs; + l_error = l_frudata[DATA_FRU_PMIC].error; + } + else + { + l_dvfs_temp = l_frudata[DATA_FRU_PMIC].pm_dvfs; + if(i_mode == OCC_MODE_TURBO) + { + //Need to log an error if we throttle in static turbo mode (for mfg) + l_error = l_dvfs_temp; + } + else + { + l_error = l_frudata[DATA_FRU_PMIC].pm_error; + } + } + // Store the DVFS thermal setpoint in 0.1 degrees C + g_amec->thermalpmic.setpoint = l_dvfs_temp * 10; + // Store the error temperature for OT detection + g_amec->thermalpmic.ot_error = l_error; + // Store the temperature timeout value + g_amec->thermalpmic.temp_timeout = l_frudata[DATA_FRU_PMIC].max_read_timeout; + + TRAC_INFO("AMEC_data_write_thrm_thresholds: PMIC setpoints - DVFS: %u, Error: %u", + l_dvfs_temp, l_error); + + // Store the external mem ctrl thermal data + if (!l_pm_limits) + { + // use normal thresholds for Nominal or OPAL + l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_EXT].dvfs; + l_error = l_frudata[DATA_FRU_MEMCTRL_EXT].error; + } + else + { + l_dvfs_temp = l_frudata[DATA_FRU_MEMCTRL_EXT].pm_dvfs; + if(i_mode == OCC_MODE_TURBO) + { + //Need to log an error if we throttle in static turbo mode (for mfg) + l_error = l_dvfs_temp; + } + else + { + l_error = l_frudata[DATA_FRU_MEMCTRL_EXT].pm_error; + } + } + // Store the DVFS thermal setpoint in 0.1 degrees C + g_amec->thermalmcext.setpoint = l_dvfs_temp * 10; + // Store the error temperature for OT detection + g_amec->thermalmcext.ot_error = l_error; + // Store the temperature timeout value + g_amec->thermalmcext.temp_timeout = l_frudata[DATA_FRU_MEMCTRL_EXT].max_read_timeout; + + TRAC_INFO("AMEC_data_write_thrm_thresholds: External MC setpoints - DVFS: %u, Error: %u", + l_dvfs_temp, l_error); + // Store the VRM Vdd thermal data if (!l_pm_limits) { diff --git a/src/occ_405/amec/amec_freq.c b/src/occ_405/amec/amec_freq.c index 4275b684..ea33a3ef 100755 --- a/src/occ_405/amec/amec_freq.c +++ b/src/occ_405/amec/amec_freq.c @@ -760,6 +760,30 @@ void amec_slv_mem_voting_box(void) kvm_reason = MEMORY_OVER_TEMP; } + // Check vote from Mem ctrl+DIMM thermal control loop + if (l_vote > g_amec->thermalmcdimm.speed_request) + { + l_vote = g_amec->thermalmcdimm.speed_request; + l_reason = AMEC_MEM_VOTING_REASON_MCDIMM; + kvm_reason = MEMORY_OVER_TEMP; + } + + // Check vote from Pmic thermal control loop + if (l_vote > g_amec->thermalpmic.speed_request) + { + l_vote = g_amec->thermalpmic.speed_request; + l_reason = AMEC_MEM_VOTING_REASON_PMIC; + kvm_reason = MEMORY_OVER_TEMP; + } + + // Check vote from external mem controller thermal control loop + if (l_vote > g_amec->thermalmcext.speed_request) + { + l_vote = g_amec->thermalmcext.speed_request; + l_reason = AMEC_MEM_VOTING_REASON_MC_EXT; + kvm_reason = MEMORY_OVER_TEMP; + } + // Check if memory autoslewing is enabled if (g_amec->mnfg_parms.mem_autoslew) { diff --git a/src/occ_405/amec/amec_freq.h b/src/occ_405/amec/amec_freq.h index 7fe8a269..8df060a3 100644 --- a/src/occ_405/amec/amec_freq.h +++ b/src/occ_405/amec/amec_freq.h @@ -5,7 +5,7 @@ /* */ /* OpenPOWER OnChipController Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2011,2015 */ +/* Contributors Listed Below - COPYRIGHT 2011,2019 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -100,6 +100,9 @@ typedef enum AMEC_MEM_VOTING_REASON_CENT = 0x01, AMEC_MEM_VOTING_REASON_DIMM = 0x02, AMEC_MEM_VOTING_REASON_SLEW = 0x03, + AMEC_MEM_VOTING_REASON_MCDIMM = 0x04, + AMEC_MEM_VOTING_REASON_PMIC = 0x05, + AMEC_MEM_VOTING_REASON_MC_EXT = 0x06, }amec_mem_voting_reason_t; // This is memory throttle reason code encoded in OPAL dynamic data diff --git a/src/occ_405/amec/amec_health.c b/src/occ_405/amec/amec_health.c index 0d99f20b..cdb7d6bb 100755 --- a/src/occ_405/amec/amec_health.c +++ b/src/occ_405/amec/amec_health.c @@ -56,6 +56,9 @@ dimm_sensor_flags_t G_dimm_timeout_logged_bitmap = {{0}}; // Are any dimms currently in the timedout state (bitmap of dimm)? dimm_sensor_flags_t G_dimm_temp_expired_bitmap = {{0}}; +// Timedout state of OCMB "DIMM" sensors by fru type (bitmap of DTS type) +uint8_t G_ocm_dts_type_expired_bitmap = 0; + // Have we already called out the centaur for timeout (bitmap of centaurs)? uint16_t G_cent_timeout_logged_bitmap = 0; @@ -177,10 +180,6 @@ void amec_health_check_dimm_temp() return; } - l_ot_error = g_amec->thermaldimm.ot_error; - l_sensor = getSensorByGsid(TEMPDIMMTHRM); - l_max_temp = l_sensor->sample_max; - //iterate over all dimms for(l_port = 0; l_port < l_max_port; l_port++) { @@ -211,14 +210,51 @@ void amec_health_check_dimm_temp() fru_temp_t* l_fru; l_fru = &g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm]; + switch(l_fru->temp_fru_type) + { + case DATA_FRU_DIMM: + l_ot_error = g_amec->thermaldimm.ot_error; + l_sensor = getSensorByGsid(TEMPDIMMTHRM); + l_max_temp = l_sensor->sample_max; + break; + + case DATA_FRU_MEMCTRL_DRAM: + l_ot_error = g_amec->thermalmcdimm.ot_error; + l_sensor = getSensorByGsid(TEMPMCDIMMTHRM); + l_max_temp = l_sensor->sample_max; + break; + + case DATA_FRU_PMIC: + l_ot_error = g_amec->thermalpmic.ot_error; + l_sensor = getSensorByGsid(TEMPPMICTHRM); + l_max_temp = l_sensor->sample_max; + break; + + case DATA_FRU_MEMCTRL_EXT: + l_ot_error = g_amec->thermalmcext.ot_error; + l_sensor = getSensorByGsid(TEMPMCEXTTHRM); + l_max_temp = l_sensor->sample_max; + break; + + default: + // this is a code bug trace and let the error be logged for debug + TRAC_ERR("amec_health_check_dimm_temp: sensor[%04X] marked as OT has invalid type[%d]", + (l_port<<8)|l_dimm, l_fru->temp_fru_type); + l_ot_error = 0xff; + l_max_temp = 0xff; + break; + } + TRAC_ERR("amec_health_check_dimm_temp: sensor[%04X] type[0x%02X] reached error temp[%d] current[%d]", + (l_port<<8)|l_dimm, l_fru->temp_fru_type, l_ot_error, l_fru->cur_temp); + amec_mem_mark_logged(l_port, l_dimm, &G_cent_overtemp_logged_bitmap, &G_dimm_overtemp_logged_bitmap.bytes[l_port]); - TRAC_ERR("amec_health_check_dimm_temp: DIMM%04X being called out for overtemp - %dC", - (l_port<<8)|l_dimm, l_fru->cur_temp); - // Create single elog with up to MAX_CALLOUTS for this port + // Create single elog with up to MAX_CALLOUTS + // this will be generic regardless of temperature sensor type, the callouts will be correct + // and the traces will point to specific types/thresholds if(l_callouts_count < ERRL_MAX_CALLOUTS) { //If we don't have an error log for the callout, create one @@ -290,12 +326,15 @@ void amec_health_check_dimm_timeout() { static dimm_sensor_flags_t L_temp_update_bitmap_prev = {{0}}; dimm_sensor_flags_t l_need_inc, l_need_clr, l_temp_update_bitmap; - uint8_t l_dimm, l_port; + uint8_t l_dimm, l_port, l_temp_timeout; fru_temp_t* l_fru; errlHndl_t l_err = NULL; uint32_t l_callouts_count = 0; uint64_t l_huid; static bool L_ran_once = FALSE; + uint8_t l_max_port = 0; // #ports in nimbus/#mem buffs in cumulus/OCM + uint8_t l_max_dimm_per_port = 0; // per port in nimbus/per mem buff in cumulus/OCM + uint8_t l_ocm_dts_type_expired_bitmap = 0; do { @@ -331,8 +370,6 @@ void amec_health_check_dimm_timeout() break; } - uint8_t l_max_port; // #ports in nimbus/#mem buffs in cumulus/OCM - uint8_t l_max_dimm_per_port; // per port in nimbus/per mem buff in cumulus/OCM if(G_sysConfigData.mem_type == MEM_TYPE_NIMBUS) { l_max_port = NUM_DIMM_PORTS; @@ -400,17 +437,42 @@ void amec_health_check_dimm_timeout() } //check if the temperature reading is still useable - if(g_amec->thermaldimm.temp_timeout == 0xff || - l_fru->sample_age < g_amec->thermaldimm.temp_timeout) + if(l_fru->temp_fru_type == DATA_FRU_DIMM) + { + l_temp_timeout = g_amec->thermaldimm.temp_timeout; + } + + else if(l_fru->temp_fru_type == DATA_FRU_MEMCTRL_DRAM) + { + l_temp_timeout = g_amec->thermalmcdimm.temp_timeout; + } + + else if(l_fru->temp_fru_type == DATA_FRU_PMIC) + { + l_temp_timeout = g_amec->thermalpmic.temp_timeout; + } + + else if(l_fru->temp_fru_type == DATA_FRU_MEMCTRL_EXT) + { + l_temp_timeout = g_amec->thermalmcext.temp_timeout; + } + + else // invalid type or not used, ignore + l_temp_timeout = 0xff; + + if(l_temp_timeout == 0xff || + l_fru->sample_age < l_temp_timeout) { continue; } - //temperature has expired. Notify control algorithms which DIMM + //temperature has expired. Notify control algorithms which DIMM DTS and type if(!(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm))) { G_dimm_temp_expired_bitmap.bytes[l_port] |= (DIMM_SENSOR0 >> l_dimm); - TRAC_ERR("Timed out reading DIMM%04X temperature sensor", (l_port<<8)|l_dimm); + TRAC_ERR("Timed out reading DIMM%04X temperature sensor type[0x%02X]", + (l_port<<8)|l_dimm, + l_fru->temp_fru_type); } //If we've already logged an error for this FRU go to the next one. @@ -421,7 +483,7 @@ void amec_health_check_dimm_timeout() // To prevent DIMMs from incorrectly being called out, don't log errors if there have // been timeouts with GPE1 tasks not finishing - if(G_error_history[ERRH_GPE1_NOT_IDLE] > g_amec->thermaldimm.temp_timeout) + if(G_error_history[ERRH_GPE1_NOT_IDLE] > l_temp_timeout) { TRAC_ERR("Timed out reading DIMM temperature due to GPE1 issues"); // give notification that GPE1 error should now be logged which will reset the OCC @@ -460,7 +522,7 @@ void amec_health_check_dimm_timeout() ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size - g_amec->thermaldimm.temp_timeout, //userdata1 + l_temp_timeout, //userdata1 0); //userdata2 } @@ -527,6 +589,38 @@ void amec_health_check_dimm_timeout() }//iterate over all dimms }//iterate over all centaurs/ports }while(0); + + // For OCM the "DIMM" dts are used for different types. Need to determine what type the + // "DIMM" DTS readings are for so the control loop will handle timeout based on correct type + if(MEM_TYPE_OCM == G_sysConfigData.mem_type) + { + if(G_dimm_temp_expired_bitmap.dw[0] || G_dimm_temp_expired_bitmap.dw[1]) + { + // at least one sensor expired. Set type for each expired sensor + //iterate across all OCMBs + for(l_port = 0; l_port < l_max_port; l_port++) + { + //iterate over all "dimm" DTS readings + for(l_dimm = 0; l_dimm < l_max_dimm_per_port; l_dimm++) + { + if(G_dimm_temp_expired_bitmap.bytes[l_port] & (DIMM_SENSOR0 >> l_dimm)) + { + // found an expired sensor + l_ocm_dts_type_expired_bitmap |= g_amec->proc[0].memctl[l_port].centaur.dimm_temps[l_dimm].dts_type_mask; + } + }//iterate over all dimms + }//iterate over all OCMBs + } // if temp expired + + // check if there is a change to any type expired + if(G_ocm_dts_type_expired_bitmap != l_ocm_dts_type_expired_bitmap) + { + TRAC_INFO("DIMM DTS type expired bitmap changed from[0x%04X] to[0x%04X]", + G_ocm_dts_type_expired_bitmap, l_ocm_dts_type_expired_bitmap); + G_ocm_dts_type_expired_bitmap = l_ocm_dts_type_expired_bitmap; + } + } // if mem type OCM + L_ran_once = TRUE; } // end amec_health_check_dimm_timeout() diff --git a/src/occ_405/amec/amec_init.c b/src/occ_405/amec/amec_init.c index 3d3faa6b..6b51b1d5 100644 --- a/src/occ_405/amec/amec_init.c +++ b/src/occ_405/amec/amec_init.c @@ -267,6 +267,21 @@ void amec_init_gamec_struct(void) g_amec->thermalcent.Pgain = 30000; g_amec->thermalcent.speed_request = AMEC_MEMORY_MAX_STEP; + // Initialize thermal controller based on temperature sensor covering both mem ctrl and DIMM + g_amec->thermalmcdimm.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C + g_amec->thermalmcdimm.Pgain = 30000; + g_amec->thermalmcdimm.speed_request = AMEC_MEMORY_MAX_STEP; + + // Initialize thermal controller based on PMIC temperatures + g_amec->thermalpmic.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C + g_amec->thermalpmic.Pgain = 30000; + g_amec->thermalpmic.speed_request = AMEC_MEMORY_MAX_STEP; + + // Initialize thermal controller based on external mem controller temperatures + g_amec->thermalmcext.setpoint = 850; //In 0.1 degrees C -> 850 = 85.0 C + g_amec->thermalmcext.Pgain = 30000; + g_amec->thermalmcext.speed_request = AMEC_MEMORY_MAX_STEP; + // Initialize component power caps g_amec->pcap.active_proc_pcap = 0; g_amec->pcap.active_mem_level = 0; diff --git a/src/occ_405/amec/amec_sensors_ocmb.c b/src/occ_405/amec/amec_sensors_ocmb.c index a2cd4a6b..8ffbefae 100644 --- a/src/occ_405/amec/amec_sensors_ocmb.c +++ b/src/occ_405/amec/amec_sensors_ocmb.c @@ -110,21 +110,21 @@ void amec_update_ocmb_sensors(uint8_t i_membuf) // End Function Specification void amec_update_ocmb_dimm_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_membuf) { +// confirmed ok to use same values for all types (internal mc, dimm, external mc, pmic...) #define MIN_VALID_DIMM_TEMP 1 #define MAX_VALID_DIMM_TEMP 125 //according to Mike Pardiek 04/23/2019 -#define MAX_MEM_TEMP_CHANGE 2 +#define MAX_MEM_TEMP_CHANGE 4 - uint32_t k, l_hottest_dimm_temp; + uint32_t k; uint16_t l_dts[NUM_DIMMS_PER_OCMB] = {0}; - uint32_t l_hottest_dimm_loc = NUM_DIMMS_PER_OCMB; int32_t l_dimm_temp, l_prev_temp; static uint8_t L_ran_once[MAX_NUM_OCMBS] = {FALSE}; - static bool L_ot_traced[MAX_NUM_OCMBS][NUM_DIMMS_PER_OCMB] = {{false}}; - // Harvest thermal data for all dimms + // Harvest thermal data for memory thermal sensors that are enabled and being used for(k=0; k < NUM_DIMMS_PER_OCMB; k++) { - if(!CENTAUR_SENSOR_ENABLED(i_membuf, k)) + if( (!CENTAUR_SENSOR_ENABLED(i_membuf, k)) || + (g_amec->proc[0].memctl[i_membuf].centaur.dimm_temps[k].temp_fru_type == DATA_FRU_NOT_USED) ) { continue; } @@ -250,48 +250,17 @@ void amec_update_ocmb_dimm_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_m } } - //Check if at or above the error temperature - if(l_dts[k] >= g_amec->thermaldimm.ot_error) - { - //Set a bit so that this dimm can be called out by the thermal thread - G_dimm_overtemp_bitmap.bytes[i_membuf] |= (DIMM_SENSOR0 >> k); - // trace first time OT per DIMM - if( !L_ot_traced[i_membuf][k] ) - { - TRAC_ERR("amec_update_ocmb_dimm_dts_sensors: Mem Buf[%d] DIMM[%d] reached error temp[%d]. current[%d]", - i_membuf, - k, - g_amec->thermaldimm.ot_error, - l_dts[k]); - L_ot_traced[i_membuf][k] = true; - } - } + //Check for over temperature must be done by type and will be checked + // in amec_update_ocmb_temp_sensors() which happens after all OCMBs have been read } - // Find hottest temperature from all DIMMs for this centaur - for(l_hottest_dimm_temp = 0, k = 0; k < NUM_DIMMS_PER_OCMB; k++) + // update the current temperatures + for(k = 0; k < NUM_DIMMS_PER_OCMB; k++) { - if(l_dts[k] > l_hottest_dimm_temp) - { - l_hottest_dimm_temp = l_dts[k]; - l_hottest_dimm_loc = k; - } g_amec->proc[0].memctl[i_membuf].centaur.dimm_temps[k].cur_temp = l_dts[k]; } - amec_centaur_t* l_centaur_ptr = &g_amec->proc[0].memctl[i_membuf].centaur; - - //only update location if hottest dimm temp is greater than previous maximum - if(l_hottest_dimm_temp > l_centaur_ptr->tempdimmax.sample_max) - { - sensor_update(&l_centaur_ptr->locdimmax, l_hottest_dimm_loc); - } - - //update the max dimm temperature sensor for this centaur - sensor_update(&l_centaur_ptr->tempdimmax, l_hottest_dimm_temp); - L_ran_once[i_membuf] = TRUE; - AMEC_DBG("Centaur[%d]: HotDimm=%d\n",i_membuf,l_hottest_dimm_temp); } // Function Specification @@ -317,129 +286,223 @@ void amec_update_ocmb_dts_sensors(OcmbMemData * i_sensor_cache, uint8_t i_membuf fru_temp_t* l_fru = &g_amec->proc[0].memctl[i_membuf].centaur.centaur_hottest; - l_prev_temp = l_fru->cur_temp; - if(!l_prev_temp) + // Internal DTS sensor is either for internal memctrl or not being used + // ignore the internal sensor if it isn't marked for internal memctrl + if(l_fru->temp_fru_type == DATA_FRU_CENTAUR) { - l_prev_temp = l_sens_temp; - } - - //Check DTS status bits - if(i_sensor_cache->status.fields.ubdts0_valid && - (!i_sensor_cache->status.fields.ubdts0_err)) - { - //make sure temperature is within a 'reasonable' range. - if(l_sens_temp < MIN_VALID_MEMBUF_TEMP || - l_sens_temp > MAX_VALID_MEMBUF_TEMP) - { - //set a flag so that if we end up logging an error we have something to debug why - l_fru->flags |= FRU_TEMP_OUT_OF_RANGE; - l_dts = l_prev_temp; - } - else - { - //don't allow temp to change more than is reasonable since last read - if(l_sens_temp > (l_prev_temp + MAX_MEM_TEMP_CHANGE)) - { - l_dts = l_prev_temp + MAX_MEM_TEMP_CHANGE; - if(!l_fru->flags) - { - TRAC_INFO("membuf temp rose faster than reasonable: membuf[%d] prev[%d] cur[%d]", - i_membuf, l_prev_temp, l_sens_temp); - l_fru->flags |= FRU_TEMP_FAST_CHANGE; - } - } - else if (l_sens_temp < (l_prev_temp - MAX_MEM_TEMP_CHANGE)) - { - l_dts = l_prev_temp - MAX_MEM_TEMP_CHANGE; - if(!l_fru->flags) - { - TRAC_INFO("membuf temp fell faster than reasonable: cent[%d] prev[%d] cur[%d]", - i_membuf, l_prev_temp, l_sens_temp); - l_fru->flags |= FRU_TEMP_FAST_CHANGE; - } - } - else //reasonable amount of change occurred - { - l_dts = l_sens_temp; - l_fru->flags &= ~FRU_TEMP_FAST_CHANGE; - } - - //Notify thermal thread that temperature has been updated - G_cent_temp_updated_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf); - - //clear error flags - l_fru->flags &= FRU_TEMP_FAST_CHANGE; - } + l_prev_temp = l_fru->cur_temp; + if(!l_prev_temp) + { + l_prev_temp = l_sens_temp; + } + + //Check DTS status bits + if(i_sensor_cache->status.fields.ubdts0_valid && + (!i_sensor_cache->status.fields.ubdts0_err)) + { + //make sure temperature is within a 'reasonable' range. + if(l_sens_temp < MIN_VALID_MEMBUF_TEMP || + l_sens_temp > MAX_VALID_MEMBUF_TEMP) + { + //set a flag so that if we end up logging an error we have something to debug why + l_fru->flags |= FRU_TEMP_OUT_OF_RANGE; + l_dts = l_prev_temp; + } + else + { + //don't allow temp to change more than is reasonable since last read + if(l_sens_temp > (l_prev_temp + MAX_MEM_TEMP_CHANGE)) + { + l_dts = l_prev_temp + MAX_MEM_TEMP_CHANGE; + if(!l_fru->flags) + { + TRAC_INFO("membuf temp rose faster than reasonable: membuf[%d] prev[%d] cur[%d]", + i_membuf, l_prev_temp, l_sens_temp); + l_fru->flags |= FRU_TEMP_FAST_CHANGE; + } + } + else if (l_sens_temp < (l_prev_temp - MAX_MEM_TEMP_CHANGE)) + { + l_dts = l_prev_temp - MAX_MEM_TEMP_CHANGE; + if(!l_fru->flags) + { + TRAC_INFO("membuf temp fell faster than reasonable: cent[%d] prev[%d] cur[%d]", + i_membuf, l_prev_temp, l_sens_temp); + l_fru->flags |= FRU_TEMP_FAST_CHANGE; + } + } + else //reasonable amount of change occurred + { + l_dts = l_sens_temp; + l_fru->flags &= ~FRU_TEMP_FAST_CHANGE; + } + + //Notify thermal thread that temperature has been updated + G_cent_temp_updated_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf); + + //clear error flags + l_fru->flags &= FRU_TEMP_FAST_CHANGE; + } + } + else //status was INVALID + { + if(L_ran_once[i_membuf]) + { + //Trace the error if we haven't traced it already for this sensor + if(!(l_fru->flags & FRU_SENSOR_STATUS_INVALID) && + i_sensor_cache->status.fields.ubdts0_err) + { + TRAC_ERR("Membuf %d temp sensor error.", i_membuf); + } + + l_fru->flags |= FRU_SENSOR_STATUS_INVALID; + } + + //use last temperature + l_dts = l_prev_temp; + } + + L_ran_once[i_membuf] = TRUE; + + //Check if at or above the error temperature + if(l_dts >= g_amec->thermalcent.ot_error) + { + //Set a bit so that this dimm can be called out by the thermal thread + G_cent_overtemp_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf); + } + + // Update Interim Data - later this will get picked up to form centaur sensor + l_fru->cur_temp = l_dts; + + AMEC_DBG("Membuf[%d]: HotMembuf=%d\n",i_membuf,l_dts); } - else //status was INVALID + else // internal sensor not being used { - if(L_ran_once[i_membuf]) - { - //Trace the error if we haven't traced it already for this sensor - if(!(l_fru->flags & FRU_SENSOR_STATUS_INVALID) && - i_sensor_cache->status.fields.ubdts0_err) - { - TRAC_ERR("Membuf %d temp sensor error.", i_membuf); - } + // make sure temperature is 0 indicating not present + l_fru->cur_temp = 0; - l_fru->flags |= FRU_SENSOR_STATUS_INVALID; - } + //Notify thermal thread that temperature has been updated so no timeout error is logged + G_cent_temp_updated_bitmap |= CENTAUR0_PRESENT_MASK >> i_membuf; - //use last temperature - l_dts = l_prev_temp; + //clear error flags + l_fru->flags = 0; } - - L_ran_once[i_membuf] = TRUE; - - //Check if at or above the error temperature - if(l_dts >= g_amec->thermalcent.ot_error) - { - //Set a bit so that this dimm can be called out by the thermal thread - G_cent_overtemp_bitmap |= (CENTAUR0_PRESENT_MASK >> i_membuf); - } - - // Update Interim Data - later this will get picked up to form centaur sensor - g_amec->proc[0].memctl[i_membuf].centaur.centaur_hottest.cur_temp = l_dts; - - AMEC_DBG("Membuf[%d]: HotMembuf=%d\n",i_membuf,l_dts); } // Function Specification // // Name: amec_update_ocmb_temp_sensors // -// Description: Updates thermal sensors that have data grabbed by the centaur. +// Description: Updates thermal sensors to give summary (across all OCMBs) for each mem type // // Thread: RealTime Loop // // End Function Specification void amec_update_ocmb_temp_sensors(void) { - uint32_t k; + uint32_t k, l_dimm; uint32_t l_hot_dimm = 0; uint32_t l_hot_mb = 0; + uint32_t l_hot_mb_dimm = 0; + uint32_t l_hot_pmic = 0; + uint32_t l_hot_ext_mb = 0; + uint8_t l_ot_error = 0; + uint8_t l_cur_temp = 0; + uint8_t l_fru_type = DATA_FRU_NOT_USED; + static bool L_ot_traced[MAX_NUM_OCMBS][NUM_DIMMS_PER_OCMB] = {{false}}; - // ----------------------------------------------------------- - // Find hottest temperature from all membufs for this Proc chip - // Find hottest temperature from all DIMMs for this Proc chip - // ----------------------------------------------------------- for(k=0; k < MAX_NUM_OCMBS; k++) { - if(g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp > l_hot_mb) + // Find hottest temperature from all internal membufs for this Proc chip + // make sure the type is "CENTAUR" i.e. internal memory controller temp + if( (g_amec->proc[0].memctl[k].centaur.centaur_hottest.temp_fru_type == DATA_FRU_CENTAUR) && + (g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp > l_hot_mb) ) { l_hot_mb = g_amec->proc[0].memctl[k].centaur.centaur_hottest.cur_temp; } - if(g_amec->proc[0].memctl[k].centaur.tempdimmax.sample > l_hot_dimm) + + // process each of the thermal sensors (stored as "dimm" temps) + // based on what type they are for and finding the hottest for each type + for(l_dimm=0; l_dimm < NUM_DIMMS_PER_OCMB; l_dimm++) { - l_hot_dimm = g_amec->proc[0].memctl[k].centaur.tempdimmax.sample; - } - } + l_fru_type = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].temp_fru_type; + l_cur_temp = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp; + + switch(l_fru_type) + { + case DATA_FRU_DIMM: + l_ot_error = g_amec->thermaldimm.ot_error; + if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_dimm) + { + l_hot_dimm = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp; + } + break; + + case DATA_FRU_MEMCTRL_DRAM: + l_ot_error = g_amec->thermalmcdimm.ot_error; + if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_mb_dimm) + { + l_hot_mb_dimm = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp; + } + break; + + case DATA_FRU_PMIC: + l_ot_error = g_amec->thermalpmic.ot_error; + if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_pmic) + { + l_hot_pmic = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp; + } + break; + + case DATA_FRU_MEMCTRL_EXT: + l_ot_error = g_amec->thermalmcext.ot_error; + if(g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp > l_hot_ext_mb) + { + l_hot_ext_mb = g_amec->proc[0].memctl[k].centaur.dimm_temps[l_dimm].cur_temp; + } + break; + + case DATA_FRU_NOT_USED: + default: + // ignore reading + l_ot_error = 0; + break; + } // end switch fru type + + // check if this "DIMM" sensor is over its error temperature + if( l_ot_error && (l_cur_temp >= l_ot_error) ) + { + //Set a bit so that this sensor can be called out by the thermal thread + G_dimm_overtemp_bitmap.bytes[k] |= (DIMM_SENSOR0 >> l_dimm); + // trace first time OT per DIMM DTS sensor + if( !L_ot_traced[k][l_dimm] ) + { + TRAC_ERR("amec_update_ocmb_temp_sensors: OCMB[%d] DTS[%d] type[0x%02X] reached error temp[%d]. current[%d]", + k, + l_dimm, + l_fru_type, + l_ot_error, + l_cur_temp); + L_ot_traced[k][l_dimm] = true; + } + } + } // end for each "dimm" thermal sensor + } // end for each OCMB + sensor_update(&g_amec->proc[0].tempcent,l_hot_mb); AMEC_DBG("HotMembuf=%d\n",l_hot_mb); sensor_update(&g_amec->proc[0].tempdimmthrm,l_hot_dimm); AMEC_DBG("HotDimm=%d\n",l_hot_dimm); + sensor_update(&g_amec->proc[0].tempmcdimmthrm,l_hot_mb_dimm); + AMEC_DBG("HotMCDimm=%d\n",l_hot_mb_dimm); + + sensor_update(&g_amec->proc[0].temppmicthrm,l_hot_pmic); + AMEC_DBG("HotPmic=%d\n",l_hot_pmic); + + sensor_update(&g_amec->proc[0].tempmcextthrm,l_hot_ext_mb); + AMEC_DBG("HotExternalMembuf=%d\n",l_hot_ext_mb); } @@ -510,24 +573,27 @@ void amec_perfcount_ocmb_getmc( OcmbMemData * i_sensor_cache, g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms = tempreg; - // Go after second MC performance counter (power ups and activations) - tempu = l_sensor_cache->mba_act; - templ = l_sensor_cache->mba_powerups; - - // ------------------------------------------------------------ - // Sensor: MRDMx (0.01 Mrps) Memory read requests per sec - // ------------------------------------------------------------ - tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms; - tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memread2ms; - sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mrd)), tempreg); - - // ------------------------------------------------------------- - // Sensor: MWRMx (0.01 Mrps) Memory write requests per sec - // ------------------------------------------------------------- - tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memwrite2ms; - tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memwrite2ms; - sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mwr)), tempreg); - + // Due to limited SRAM only have sensor support for first 12 mem buffs + if(i_membuf < 12) + { + // Go after second MC performance counter (power ups and activations) + tempu = l_sensor_cache->mba_act; + templ = l_sensor_cache->mba_powerups; + + // ------------------------------------------------------------ + // Sensor: MRDMx (0.01 Mrps) Memory read requests per sec + // ------------------------------------------------------------ + tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memread2ms; + tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memread2ms; + sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mrd)), tempreg); + + // ------------------------------------------------------------- + // Sensor: MWRMx (0.01 Mrps) Memory write requests per sec + // ------------------------------------------------------------- + tempreg = g_amec->proc[0].memctl[i_membuf].centaur.portpair[0].perf.memwrite2ms; + tempreg += g_amec->proc[0].memctl[i_membuf].centaur.portpair[1].perf.memwrite2ms; + sensor_update( (&(g_amec->proc[0].memctl[i_membuf].mwr)), tempreg); + } return; } diff --git a/src/occ_405/amec/amec_sys.h b/src/occ_405/amec/amec_sys.h index c19cd531..f17b42b3 100755 --- a/src/occ_405/amec/amec_sys.h +++ b/src/occ_405/amec/amec_sys.h @@ -146,6 +146,7 @@ typedef struct amec_cent_mem_speed_t last_mem_speed_sent; } amec_portpair_t; +// bit masks for fru_temp_t flags #define FRU_SENSOR_STATUS_STALLED 0x01 #define FRU_SENSOR_STATUS_ERROR 0x02 #define FRU_SENSOR_STATUS_VALID_OLD 0x04 @@ -154,6 +155,12 @@ typedef struct #define FRU_TEMP_FAST_CHANGE 0x20 #define FRU_SENSOR_CENT_NEST_FIR6 0x40 //centaur only +// OpenCAPI memory only bit masks for fru_temp_t dts_type_mask +#define OCM_DTS_TYPE_DIMM_MASK 0x01 +#define OCM_DTS_TYPE_MEMCTRL_DRAM_MASK 0x02 +#define OCM_DTS_TYPE_PMIC_MASK 0x04 +#define OCM_DTS_TYPE_MEMCTRL_EXT_MASK 0x08 + typedef struct { uint8_t cur_temp; @@ -161,6 +168,10 @@ typedef struct uint8_t flags; // Sensor ID for reporting temperature to BMC and FSP uint32_t temp_sid; + // Indicates what eConfigDataFruType this temperature is for + uint8_t temp_fru_type; + // Indicates what this temperature is for + uint8_t dts_type_mask; }fru_temp_t; typedef struct @@ -191,7 +202,6 @@ typedef struct // Sensor ID for reporting temperature to BMC and FSP uint32_t temp_sid; - } amec_centaur_t; typedef struct @@ -469,8 +479,11 @@ typedef struct vectorSensor_t util_vector; // Memory Summary Sensors - sensor_t tempcent; - sensor_t tempdimmthrm; + sensor_t tempcent; // hottest of all DATA_FRU_CENTAUR monitored by this OCC + sensor_t tempdimmthrm; // hottest of all DATA_FRU_DIMM monitored by this OCC + sensor_t tempmcdimmthrm; // hottest of all DATA_FRU_MEMCTRL_DRAM monitored by this OCC + sensor_t temppmicthrm; // hottest of all DATA_FRU_PMIC monitored by this OCC + sensor_t tempmcextthrm; // hottest of all DATA_FRU_MEMCTRL_EXT monitored by this OCC sensor_t mempwrthrot; sensor_t memotthrot; @@ -628,10 +641,16 @@ typedef struct //--------------------------------------------------------- // Thermal Controller based on processor temperatures amec_controller_t thermalproc; - // Thermal Controller based on Centaur temperatures + // Thermal Controller based on Centaur (internal mc) temperatures amec_controller_t thermalcent; // Thermal Controller based on DIMM temperatures amec_controller_t thermaldimm; + // Thermal Controller based on temperature sensors covering both Memctrl+DIMM + amec_controller_t thermalmcdimm; + // Thermal Controller based on PMIC temperatures + amec_controller_t thermalpmic; + // Thermal Controller based on external mem controller temperatures + amec_controller_t thermalmcext; // Thermal Controller based on VRM Vdd temperatures amec_controller_t thermalvdd; diff --git a/src/occ_405/cent/centaur_data.c b/src/occ_405/cent/centaur_data.c index 0e6f17d7..3f6f2555 100755 --- a/src/occ_405/cent/centaur_data.c +++ b/src/occ_405/cent/centaur_data.c @@ -104,8 +104,8 @@ extern gpe_shared_data_t G_shared_gpe_data; // MemDataMcs mcs; // not used // MemDataSensorCache scache; // } MemData; -//Global array of centaur data buffers -GPE_BUFFER(CentaurMemData G_centaur_data[NUM_CENTAUR_DATA_BUFF + +//Global array of centaur data buffers common with OCMB +GPE_BUFFER(CentaurMemData G_centaur_data[MAX_NUM_MEM_CONTROLLERS + NUM_CENTAUR_DOUBLE_BUF + NUM_CENTAUR_DATA_EMPTY_BUF]); @@ -118,18 +118,19 @@ GPE_BUFFER(MemBufScomParms_t G_cent_scom_gpe_parms); //scom command list entry GPE_BUFFER(scomList_t G_cent_scom_list_entry[NUM_CENT_OPS]); -//buffer for storing output from running IPC_ST_MEMBUF_SCOM() +//buffer for storing output from running IPC_ST_MEMBUF_SCOM() Centaur only GPE_BUFFER(uint64_t G_cent_scom_data[MAX_NUM_CENTAURS]) = {0}; // parms for call to IPC_ST_MEMBUF_INIT_FUNCID GPE_BUFFER(MemBufConfigParms_t G_gpe_centaur_config_args); GPE_BUFFER(MemBufConfiguration_t G_membufConfiguration); -//Global array of centaur data pointers -CentaurMemData * G_centaur_data_ptrs[MAX_NUM_CENTAURS] = { &G_centaur_data[0], - &G_centaur_data[1], &G_centaur_data[2], &G_centaur_data[3], - &G_centaur_data[4], &G_centaur_data[5], &G_centaur_data[6], - &G_centaur_data[7]}; +//Global array of centaur data pointers common with OCMB need to use max mem ctrl to cover max OCMB +CentaurMemData * G_centaur_data_ptrs[MAX_NUM_MEM_CONTROLLERS] = + { &G_centaur_data[0], &G_centaur_data[1], &G_centaur_data[2], &G_centaur_data[3], + &G_centaur_data[4], &G_centaur_data[5], &G_centaur_data[6], &G_centaur_data[7], + &G_centaur_data[8], &G_centaur_data[9], &G_centaur_data[10], &G_centaur_data[11], + &G_centaur_data[12], &G_centaur_data[13], &G_centaur_data[14], &G_centaur_data[15] }; //Global structures for gpe get mem data parms GPE_BUFFER(MemBufGetMemDataParms_t G_membuf_data_parms); @@ -146,6 +147,15 @@ membuf_data_task_t G_membuf_data_task = { .membuf_data_ptr = &G_centaur_data[MAX_NUM_CENTAURS] }; +//OCMB structures used for task data pointers. +membuf_data_task_t G_ocmb_data_task = { + .start_membuf = 0, + .current_membuf = 0, + .end_membuf = 15, + .prev_membuf = 15, + .membuf_data_ptr = &G_centaur_data[MAX_NUM_OCMBS] +}; + dimm_sensor_flags_t G_dimm_enabled_sensors = {{0}}; dimm_sensor_flags_t G_dimm_present_sensors = {{0}}; @@ -600,10 +610,18 @@ void centaur_data( void ) membuf_data_task_t * l_centaur_data_ptr = &G_membuf_data_task; MemBufGetMemDataParms_t * l_parms = (MemBufGetMemDataParms_t *)(l_centaur_data_ptr->gpe_req.cmd_data); + uint8_t l_empty_buf_idx = MAX_NUM_CENTAURS + 1; // array index for empty buffer static bool L_gpe_scheduled = FALSE; static bool L_gpe_error_logged = FALSE; static bool L_gpe_had_1_tick = FALSE; + // local inits are for Centaur, need to change some that are different for OCM + if(G_sysConfigData.mem_type == MEM_TYPE_OCM) + { + l_centaur_data_ptr = &G_ocmb_data_task; + l_empty_buf_idx = MAX_NUM_OCMBS + 1; + } + do { // ------------------------------------------ @@ -806,7 +824,7 @@ void centaur_data( void ) // (this is very handy for debug...) if( !CENTAUR_PRESENT(l_centaur_data_ptr->current_membuf)) { - G_centaur_data_ptrs[l_centaur_data_ptr->current_membuf] = &G_centaur_data[9]; + G_centaur_data_ptrs[l_centaur_data_ptr->current_membuf] = &G_centaur_data[l_empty_buf_idx]; } //Update current centaur @@ -1266,7 +1284,7 @@ void centaur_init( void ) // // Description: Returns a pointer to the most up-to-date centaur data for // the centaur associated with the specified OCC centaur id. -// Returns NULL for centaur ID outside the range of 0 to 7. +// Returns NULL for mem buf ID outside range. // // End Function Specification CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_occ_centaur_id ) @@ -1274,14 +1292,21 @@ CentaurMemData * cent_get_centaur_data_ptr( const uint8_t i_occ_centaur_id ) //The caller needs to send in a valid OCC centaur id. Since type is uchar //so there is no need to check for case less than 0. //If centaur id is invalid then returns NULL. - if( i_occ_centaur_id < MAX_NUM_CENTAURS ) + if( (G_sysConfigData.mem_type == MEM_TYPE_CUMULUS) && + (i_occ_centaur_id < MAX_NUM_CENTAURS) ) + { + //Returns a pointer to the most up-to-date centaur data. + return G_centaur_data_ptrs[i_occ_centaur_id]; + } + else if( (G_sysConfigData.mem_type == MEM_TYPE_OCM) && + (i_occ_centaur_id < MAX_NUM_OCMBS) ) { //Returns a pointer to the most up-to-date centaur data. return G_centaur_data_ptrs[i_occ_centaur_id]; } else { - //Core id outside the range + //Mem buf id outside the range TRAC_ERR("cent_get_centaur_data_ptr: Invalid OCC centaur id [0x%x]", i_occ_centaur_id); return( NULL ); } diff --git a/src/occ_405/cent/centaur_data.h b/src/occ_405/cent/centaur_data.h index b5177b16..d6c2cea6 100755 --- a/src/occ_405/cent/centaur_data.h +++ b/src/occ_405/cent/centaur_data.h @@ -121,6 +121,7 @@ typedef struct membuf_data_task membuf_data_task_t; //Global centaur structures used for task data pointers extern membuf_data_task_t G_membuf_data_task; +extern membuf_data_task_t G_ocmb_data_task; //Global is bitmask of centaurs extern uint32_t G_present_centaurs; diff --git a/src/occ_405/cent/ocmb_data.c b/src/occ_405/cent/ocmb_data.c index fc3f8fa0..e78e66d8 100644 --- a/src/occ_405/cent/ocmb_data.c +++ b/src/occ_405/cent/ocmb_data.c @@ -42,7 +42,7 @@ extern gpe_shared_data_t G_shared_gpe_data; * @post G_membufConfiguration populated * @post G_present_centaurs populated * @post G_dimm_present_sensors - * @post G_membuf_data_task populated + * @post G_ocmb_data_task populated * @post GPE request to call for recover created ? * @post GPE request to call for throttle conttrol created * @note HW Deadman timer enabled and set to max value @@ -109,7 +109,7 @@ void ocmb_init(void) G_membuf_data_parms.data = 0; rc = gpe_request_create( - &G_membuf_data_task.gpe_req, //gpe_req for the task + &G_ocmb_data_task.gpe_req, //gpe_req for the task &G_async_gpe_queue1, //queue IPC_ST_MEMBUF_DATA_FUNCID, //Function ID &G_membuf_data_parms, //parm for the task @@ -120,7 +120,7 @@ void ocmb_init(void) if( rc ) { TRAC_ERR("ocmb_init: gpe_request_create failed for " - "G_membuf_data_task.gpe_req. rc = 0x%08x", rc); + "G_ocmb_data_task.gpe_req. rc = 0x%08x", rc); break; } diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds.c b/src/occ_405/cmdh/cmdh_fsp_cmds.c index 493d7c57..e8ee2d6e 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds.c +++ b/src/occ_405/cmdh/cmdh_fsp_cmds.c @@ -205,8 +205,11 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr) } //If memory is being throttled due to OverTemp or due to Failure to read sensors set mthrot_due_to_ot bit. - if (((g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_DIMM) || - (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_CENT))) + if ( (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_DIMM) || + (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_CENT) || + (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_MCDIMM) || + (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_PMIC) || + (g_amec->mem_throttle_reason == AMEC_MEM_VOTING_REASON_MC_EXT) ) { l_poll_rsp->ext_status.mthrot_due_to_ot = 1; } @@ -384,7 +387,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr) //Add entry for centaurs. uint32_t l_temp_sid = g_amec->proc[0].memctl[l_cent].centaur.temp_sid; l_tempSensorList[l_sensorHeader.count].id = l_temp_sid; - l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_CENTAUR; + l_tempSensorList[l_sensorHeader.count].fru_type = g_amec->proc[0].memctl[l_cent].centaur.centaur_hottest.temp_fru_type; if (G_cent_timeout_logged_bitmap & (CENTAUR0_PRESENT_MASK >> l_cent)) { l_tempSensorList[l_sensorHeader.count].value = 0xFF; @@ -414,7 +417,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr) if (l_temp_sid != 0) { l_tempSensorList[l_sensorHeader.count].id = l_temp_sid; - l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_DIMM; + l_tempSensorList[l_sensorHeader.count].fru_type = g_amec->proc[0].memctl[l_cent].centaur.dimm_temps[l_dimm].temp_fru_type; //If a dimm timed out long enough, we should return 0xFFFF for that sensor. if (G_dimm_temp_expired_bitmap.bytes[l_cent] & (DIMM_SENSOR0 >> l_dimm)) { diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c index 0ca1e04f..e9763095 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c +++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.c @@ -2104,13 +2104,6 @@ errlHndl_t data_store_thrm_thresholds(const cmdh_fsp_cmd_t * i_cmd_ptr, G_data_cnfg->thrm_thresh.data[l_frutype].max_read_timeout = l_cmd_ptr->data[i].max_read_timeout; - // VRM OT status is no longer supported since the OCC supports reading Vdd temperature - // Trace if VRM OT status FRU type is received and just ignore it - if(l_frutype == DATA_FRU_VRM_OT_STATUS) - { - CMDH_TRAC_IMP("data_store_thrm_thresholds: Received deprecated VRM OT STATUS type will be ignored"); - } - // Useful trace for debugging //CMDH_TRAC_INFO("data_store_thrm_thresholds: FRU_type[0x%.2X] T_control[%u] DVFS[%u] Error[%u]", // G_data_cnfg->thrm_thresh.data[l_frutype].fru_type, @@ -2352,12 +2345,15 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr, if (IS_OCM_MEM_TYPE(l_data_set->memory_type)) { - // Get the physical location from type - l_membuf_num &= OCMB_TYPE_LOCATION_MASK; if (G_sysConfigData.mem_type != MEM_TYPE_OCM) { l_type_mismatch = TRUE; } + else + { + // Get the physical location from type + l_membuf_num &= OCMB_TYPE_LOCATION_MASK; + } } else if (G_sysConfigData.mem_type != MEM_TYPE_CUMULUS) { @@ -2383,13 +2379,34 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr, // Store the temperature sensor ID g_amec->proc[0].memctl[l_membuf_num].centaur.temp_sid = l_data_set->temp_sensor_id; - if (G_sysConfigData.mem_type == MEM_TYPE_OCM) + // Specific handling for OCMB vs Centaur + if(G_sysConfigData.mem_type == MEM_TYPE_OCM) { - // Both OCMB and Centaur code use this global to idicate which MBs - // are present, but Centaur sets this up later in centaur_init() - G_present_centaurs |= (CENTAUR0_PRESENT_MASK >> l_membuf_num); + // Both OCMB and Centaur code use this global to idicate which MBs + // are present, but Centaur sets this up later in centaur_init() + G_present_centaurs |= (CENTAUR0_PRESENT_MASK >> l_membuf_num); + + // Store the temperature sensor fru type + // The internal sensor is either for internal memctrl ("centaur" fru type) + // or it is not being used due to hw bug + if( (l_data_set->dimm_info2 == DATA_FRU_CENTAUR) || + (l_data_set->dimm_info2 == DATA_FRU_NOT_USED) ) + { + g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = l_data_set->dimm_info2; + } + else + { + // not a valid fru type for the internal sensor, trace and don't use it + CMDH_TRAC_ERR("data_store_mem_cfg: Got invalid fru type[0x%02X] for mem buf[%d]", + l_data_set->dimm_info2, l_membuf_num); + g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = DATA_FRU_NOT_USED; + } + } + else // centaur + { + // must be type centaur + g_amec->proc[0].memctl[l_membuf_num].centaur.centaur_hottest.temp_fru_type = DATA_FRU_CENTAUR; } - l_num_mem_bufs++; } else // individual DIMM @@ -2401,6 +2418,48 @@ errlHndl_t data_store_mem_cfg(const cmdh_fsp_cmd_t * i_cmd_ptr, g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_sid = l_data_set->temp_sensor_id; + // Store the temperature sensor fru type + if(G_sysConfigData.mem_type == MEM_TYPE_OCM) + { + // The 2 external temp sensors may be used for non-dimm fru type i.e. PMIC, mem controller... + // this fru type is coming from attributes setup by HWP during IPL and then read by (H)TMGT + if(l_data_set->dimm_info2 == DATA_FRU_DIMM) + { + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_DIMM; + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_DIMM_MASK; + } + else if(l_data_set->dimm_info2 == DATA_FRU_MEMCTRL_DRAM) + { + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_MEMCTRL_DRAM; + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_MEMCTRL_DRAM_MASK; + } + else if(l_data_set->dimm_info2 == DATA_FRU_PMIC) + { + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_PMIC; + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_PMIC_MASK; + } + else if(l_data_set->dimm_info2 == DATA_FRU_MEMCTRL_EXT) + { + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_MEMCTRL_EXT; + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = OCM_DTS_TYPE_MEMCTRL_EXT_MASK; + } + else // sensor not used + { + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_NOT_USED; + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].dts_type_mask = 0; + if (l_data_set->dimm_info2 != DATA_FRU_NOT_USED) + { + // not a valid fru type + CMDH_TRAC_ERR("data_store_mem_cfg: Got invalid fru type[0x%02X] for mem buf[%d] dimm[%d]", + l_data_set->dimm_info2, l_membuf_num, l_dimm_num); + } + } + } + else // centaur + { + // must be type DIMM + g_amec->proc[0].memctl[l_membuf_num].centaur.dimm_temps[l_dimm_num].temp_fru_type = DATA_FRU_DIMM; + } l_num_dimms++; } } // end CENTAUR/OCMB diff --git a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h index e37e1148..ae76cc75 100755 --- a/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h +++ b/src/occ_405/cmdh/cmdh_fsp_cmds_datacnfg.h @@ -77,15 +77,21 @@ typedef enum typedef enum { DATA_FRU_PROC = 0x00, - DATA_FRU_CENTAUR = 0x01, + DATA_FRU_CENTAUR = 0x01, // memory controller (Centaur/Explorer) sensor location is internal DATA_FRU_DIMM = 0x02, - DATA_FRU_VRM_OT_STATUS = 0x03, // this is just for the bit and is no longer being supported + DATA_FRU_MEMCTRL_DRAM = 0x03, // OCM only. external sensor covers both mem controller and DRAM DATA_FRU_GPU = 0x04, DATA_FRU_GPU_MEM = 0x05, DATA_FRU_VRM_VDD = 0x06, // this is an actual temperature reading for VRM Vdd + DATA_FRU_PMIC = 0x07, // OCM only + DATA_FRU_MEMCTRL_EXT = 0x08, // OCM only. memory controller sensor location is external DATA_FRU_MAX, } eConfigDataFruType; +// For OCM the mapping of sensors to fru type comes in mem config data from (H)TMGT and some may not be used +// OCC should ignore readings from all sesnors marked as not used +#define DATA_FRU_NOT_USED 0xFF + typedef enum { PWR_READING_TYPE_APSS = 0x00, diff --git a/src/occ_405/occbuildname.c b/src/occ_405/occbuildname.c index 54dcc231..74bd96ed 100755 --- a/src/occ_405/occbuildname.c +++ b/src/occ_405/occbuildname.c @@ -34,6 +34,6 @@ volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = #else -volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_190723b\0" /**/ ; +volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_190726a\0" /**/ ; #endif diff --git a/src/occ_405/sensor/sensor_enum.h b/src/occ_405/sensor/sensor_enum.h index e56fea2e..0133fec4 100755 --- a/src/occ_405/sensor/sensor_enum.h +++ b/src/occ_405/sensor/sensor_enum.h @@ -511,6 +511,9 @@ enum e_gsid // ------------------------------------------------------ TEMPCENT, TEMPDIMMTHRM, + TEMPMCDIMMTHRM, + TEMPPMICTHRM, + TEMPMCEXTTHRM, // ------------------------------------------------------ // GPU Sensors diff --git a/src/occ_405/sensor/sensor_info.c b/src/occ_405/sensor/sensor_info.c index e57e4e4d..a9bc7c80 100755 --- a/src/occ_405/sensor/sensor_info.c +++ b/src/occ_405/sensor/sensor_info.c @@ -336,6 +336,9 @@ const sensor_info_t G_sensor_info[] = /* ==MemSummarySensors== NameString Units Type Location Number Freq ScaleFactor */ SENSOR_INFO_T_ENTRY( TEMPCENT, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_8TH_TICK_HZ, AMEFP( 1, 0) ), SENSOR_INFO_T_ENTRY( TEMPDIMMTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ), + SENSOR_INFO_T_ENTRY( TEMPMCDIMMTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ), + SENSOR_INFO_T_ENTRY( TEMPPMICTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ), + SENSOR_INFO_T_ENTRY( TEMPMCEXTTHRM, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_MEM, AMEC_SENSOR_NONUM, AMEEFP_EVERY_128TH_TICK_HZ, AMEFP( 1, 0) ), /* ==GPUSensors== NameString Units Type Location Number Freq ScaleFactor */ SENSOR_INFO_T_ENTRY( TEMPGPU0, "C\0", AMEC_SENSOR_TYPE_TEMP, AMEC_SENSOR_LOC_GPU, AMEC_SENSOR_NONUM, AMEEFP_1S_IN_HZ, AMEFP( 1, 0) ), diff --git a/src/occ_405/sensor/sensor_table.c b/src/occ_405/sensor/sensor_table.c index fe66b163..6237c5fd 100755 --- a/src/occ_405/sensor/sensor_table.c +++ b/src/occ_405/sensor/sensor_table.c @@ -385,6 +385,9 @@ const sensor_ptr_t G_amec_sensor_list[] = SENSOR_PTR(TEMPCENT, &g_amec_sys.proc[0].tempcent), SENSOR_PTR(TEMPDIMMTHRM, &g_amec_sys.proc[0].tempdimmthrm), + SENSOR_PTR(TEMPMCDIMMTHRM, &g_amec_sys.proc[0].tempmcdimmthrm), + SENSOR_PTR(TEMPPMICTHRM, &g_amec_sys.proc[0].temppmicthrm), + SENSOR_PTR(TEMPMCEXTTHRM, &g_amec_sys.proc[0].tempmcextthrm), // ------------------------------------------------------ // GPU Sensors @@ -558,6 +561,9 @@ const minisensor_ptr_t G_amec_mini_sensor_list[] INIT_SECTION = MINI_SENSOR_PTR( TEMPCENT, &G_dcom_slv_outbox_tx.tempcent), MINI_SENSOR_PTR( TEMPDIMMTHRM, &G_dcom_slv_outbox_tx.tempdimmthrm), + MINI_SENSOR_PTR( TEMPMCDIMMTHRM, NULL), // $todo RTC: 213569 add 3 new summary sensors to call home + MINI_SENSOR_PTR( TEMPPMICTHRM, NULL), + MINI_SENSOR_PTR( TEMPMCEXTTHRM, NULL), // ------------------------------------------------------ // GPU Sensors