Skip to content

Commit

Permalink
GPU 405 Enable Memory Temperatures
Browse files Browse the repository at this point in the history
Change-Id: Id50d12a50a05b8b3a6a6f1ce3ce4512d3299caa7
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/46882
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Reviewed-by: Christopher J. Cain <cjcain@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
  • Loading branch information
wilbryan committed Oct 3, 2017
1 parent 76b91d0 commit 74f721c
Show file tree
Hide file tree
Showing 11 changed files with 1,303 additions and 312 deletions.
6 changes: 4 additions & 2 deletions src/common/gpe_err.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
#define GPE_RC_GET_NEST_DTS_FAILED 0x61 // Failed to collect nest DTS temperatures

// GPU Errors
#define GPE_RC_NO_GPU_SUPPORT 0x8F // GPE1 image doesn't support GPUs

#define GPE_RC_NO_GPU_SUPPORT 0x80 // GPE1 image doesn't support GPUs
#define GPE_RC_GPU_DRIVER_CHANGE 0x81 // GPU in transition or just completed phase change
#define GPE_RC_GPU_CMD_NOT_SUPPORTED 0x82 // GPU rejected command with no support
#define GPE_RC_GPU_CMD_FAILED 0x83 // An error occurred in the last GPU operation
#endif //_GPE_ERR_H
31 changes: 23 additions & 8 deletions src/common/gpu_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
#define GPU_RESET_REQ_SLV 2
#define GPU_RESET_REQ_SLV_COMPLETE 3

typedef enum
{
GPU_CAP_MEM = 0x00,
GPU_CAP_CORE = 0x01
} GPU_CAPABILITIES;

typedef enum
{
ID_GPU0 = 0x00,
Expand All @@ -64,12 +70,21 @@ typedef enum
GPU_REQ_READ_MEM_TEMP_3 = 0x06, // mem temp step 3
GPU_REQ_READ_MEM_TEMP_FINISH = 0x07, // Get memory temp reading
GPU_REQ_READ_CAPS_START = 0x08, // Start reading capabilities
GPU_REQ_READ_CAPS_2 = 0x09, // Start reading capabilities
GPU_REQ_READ_CAPS_3 = 0x0A, // Start reading capabilities
GPU_REQ_READ_CAPS_FINISH = 0x0B,
GPU_REQ_READ_PWR_LIMIT_START = 0x0C, // Start reading GPU information
GPU_REQ_READ_PWR_LIMIT_STOP = 0x0D, // Read GPU temp register
GPU_REQ_READ_PWR_LIMIT = 0x0E, // Start reading pwr limit
GPU_REQ_READ_CAPS_2 = 0x09, // Capabilities read step 2
GPU_REQ_READ_CAPS_3 = 0x0A, // Capabilities read step 3
GPU_REQ_READ_CAPS_FINISH = 0x0B, // get capabilities
GPU_REQ_READ_PWR_LIMIT_START = 0x10, // Start reading GPU power limit
GPU_REQ_READ_PWR_LIMIT_2 = 0x11,
GPU_REQ_READ_PWR_LIMIT_3 = 0x12,
GPU_REQ_READ_PWR_LIMIT_FINISH = 0x13,
GPU_REQ_SET_PWR_LIMIT_START = 0x20, // Start setting GPU power limit
GPU_REQ_SET_PWR_LIMIT_2 = 0x21,
GPU_REQ_SET_PWR_LIMIT_3 = 0x22,
GPU_REQ_SET_PWR_LIMIT_FINISH = 0x23,
GPU_REQ_CHECK_DRIVER_START = 0x31, // Start check driver loaded
GPU_REQ_CHECK_DRIVER_2 = 0x32,
GPU_REQ_CHECK_DRIVER_3 = 0x33,
GPU_REQ_CHECK_DRIVER_FINISH = 0x34,
GPU_REQ_RESET = 0x60, // Reset
} gpu_op_req_e;

Expand All @@ -78,10 +93,10 @@ typedef struct
{
GpeErrorStruct error;
uint8_t gpu_id;
uint8_t gpu_rc;
uint8_t operation;
uint64_t data;
uint64_t data[3];
} gpu_sm_args_t;


#endif // _GPU_STRUCTS_H

20 changes: 16 additions & 4 deletions src/include/registers/ocb_firmware_registers.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER OnChipController Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2015,2016 */
/* Contributors Listed Below - COPYRIGHT 2015,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -1411,9 +1411,21 @@ typedef union ocb_occflg {
uint32_t i2c_engine2_lock_occ : 1;
uint32_t i2c_engine3_lock_host : 1;
uint32_t i2c_engine3_lock_occ : 1;
uint32_t reserved_occ : 10;
#else
uint32_t reserved_occ : 10;
uint32_t gpu0_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu2_reset_status : 1;
uint32_t reserved_occ : 3;
uint32_t wof_hcode_mode : 2;
uint32_t active_quad_update : 1;
uint32_t request_occ_safe : 1;
#else
uint32_t request_occ_safe : 1;
uint32_t active_quad_update : 1;
uint32_t wof_hcode_mode : 2;
uint32_t reserved_occ : 3;
uint32_t gpu2_reset_status : 1;
uint32_t gpu1_reset_status : 1;
uint32_t gpu0_reset_status : 1;
uint32_t i2c_engine3_lock_occ : 1;
uint32_t i2c_engine3_lock_host : 1;
uint32_t i2c_engine2_lock_occ : 1;
Expand Down
6 changes: 0 additions & 6 deletions src/occ_405/amec/amec_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,12 +458,6 @@ void amec_data_write_pcap(void)
g_amec->pcap.ovs_node_pcap = G_sysConfigData.pcap.hard_min_pcap;
}

//Oversubscription pcap can NOT be higher than a customer set pcap.
if(g_amec->pcap.ovs_node_pcap > l_customer)
{
g_amec->pcap.ovs_node_pcap = l_customer;
}

//for all new pcap data setting: If KVM, update the OPAL dynamic data
if(G_sysConfigData.system_type.kvm)
{
Expand Down
81 changes: 70 additions & 11 deletions src/occ_405/amec/amec_pcap.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,16 @@ extern uint32_t G_first_num_gpus_sys;
// Thread: Real Time Loop
//
// End Function Specification
void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power)
void amec_gpu_pcap(bool i_oversubscription, bool i_active_pcap_changed, int32_t i_avail_power)
{
/*------------------------------------------------------------------------*/
/* Local Variables */
/*------------------------------------------------------------------------*/
uint8_t i = 0;
uint32_t l_gpu_cap_mw = 0;
uint16_t l_system_gpu_total_pcap = 0; // total GPU pcap required by system based on if currently in oversub or not
static uint16_t L_total_gpu_pcap = 0; // Current total GPU pcap in effect
static uint16_t L_n_plus_1_mode_gpu_total_pcap = 0; // Total GPU pcap required for N+1 (not in oversubscription)
static uint16_t L_n_mode_gpu_total_pcap = 0; // Total GPU pcap required for oversubscription
static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR
static uint16_t L_per_gpu_pcap = 0; // Amount of L_total_gpu_pcap for each GPU
Expand All @@ -112,10 +114,12 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power)
/*------------------------------------------------------------------------*/
/* Code */
/*------------------------------------------------------------------------*/
// If this is the first time running calculate the total GPU power cap for oversubscription
// If this is the first time running calculate the total GPU power cap for system power caps (N and N+1)
if(L_first_run)
{
// calculate total GPU power cap for oversubscription
if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)

{
// Take all non-GPU power away from the oversubscription power cap
L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
Expand Down Expand Up @@ -157,6 +161,50 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power)
ERRL_CALLOUT_PRIORITY_HIGH);
commitErrl(&l_err);
}

// calculate total GPU power cap for N+1 (not in oversubscription)
if(G_sysConfigData.pcap.system_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
{
// Take all non-GPU power away from the N+1 power cap
L_n_plus_1_mode_gpu_total_pcap = G_sysConfigData.pcap.system_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
// Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs
L_n_plus_1_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts;
}
else
{
// This should not happen, the total non GPU power should never be higher than the N+1 mode cap
// Log error and set GPUs to minimum power cap
L_n_plus_1_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap

TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N+1 mode pwr limit %dW",
G_sysConfigData.total_non_gpu_max_pwr_watts, G_sysConfigData.pcap.system_pcap);

/* @
* @errortype
* @moduleid AMEC_GPU_PCAP_MID
* @reasoncode GPU_FAILURE
* @userdata1 N+1 mode Power Cap watts
* @userdata2 Total non-GPU power watts
* @userdata4 ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE
* @devdesc Total non-GPU power more than N+1 mode power cap
*
*/
errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID,
GPU_FAILURE,
ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE,
ERRL_SEV_PREDICTIVE,
NULL,
DEFAULT_TRACE_SIZE,
G_sysConfigData.pcap.system_pcap,
G_sysConfigData.total_non_gpu_max_pwr_watts);

//Callout firmware
addCalloutToErrl(l_err,
ERRL_CALLOUT_TYPE_COMPONENT_ID,
ERRL_COMPONENT_ID_FIRMWARE,
ERRL_CALLOUT_PRIORITY_HIGH);
commitErrl(&l_err);
}
} // if first run

// Calculate the total GPU power cap for the current active limit and PSR
Expand All @@ -180,12 +228,23 @@ void amec_gpu_pcap(bool i_active_pcap_changed, int32_t i_avail_power)
G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap);
}

// Total GPU power cap is the lower of oversubscription and active power limit
// must always account for oversubscription to ensure when a power supply is lost the OCC
// can react fast enough, GPU power capping is too slow and must have GPU power cap already
// set to account for oversubscription case
L_total_gpu_pcap = (L_n_mode_gpu_total_pcap < L_active_psr_gpu_total_pcap) ?
L_n_mode_gpu_total_pcap : L_active_psr_gpu_total_pcap;
// Total GPU power cap is the lower of system (N+1 or oversubscription depending on if in oversub)
// and the active power limit. We do not need to always account for oversubscription since
// the automatic hw power brake will assert to the GPUs if there is a problem when oversub is
// entered from the time OCC can set and GPUs react to a new power limit
if(i_oversubscription)
{
// system in oversubscription use N mode cap
l_system_gpu_total_pcap = L_n_mode_gpu_total_pcap;
}
else
{
// system is not in oversubscription use N+1 mode cap
l_system_gpu_total_pcap = L_n_plus_1_mode_gpu_total_pcap;
}

L_total_gpu_pcap = (l_system_gpu_total_pcap < L_active_psr_gpu_total_pcap) ?
l_system_gpu_total_pcap : L_active_psr_gpu_total_pcap;

// Divide the total equally across all GPUs in the system
if(G_first_num_gpus_sys)
Expand Down Expand Up @@ -282,8 +341,8 @@ void amec_pcap_calc(void)
l_oversub_state = AMEC_INTF_GET_OVERSUBSCRIPTION();

// Determine the active power cap. norm_node_pcap is set as lowest
// between sys and user in amec_data_write_pcap()
// when in oversub only use oversub pcap if lower than norm_node_pcap
// between sys (N+1 mode) and user in amec_data_write_pcap()
// when in oversub (N mode) only use oversub pcap if lower than norm_node_pcap
// to handle user set power cap lower than the oversub power cap
if( (TRUE == l_oversub_state) &&
(g_amec->pcap.ovs_node_pcap < g_amec->pcap.norm_node_pcap) )
Expand Down Expand Up @@ -312,7 +371,7 @@ void amec_pcap_calc(void)
// Determine GPU power cap if there are GPUs present
if(G_first_proc_gpu_config)
{
amec_gpu_pcap(l_active_pcap_changed, l_avail_power);
amec_gpu_pcap(l_oversub_state, l_active_pcap_changed, l_avail_power);
}

if(l_node_pwr != 0)
Expand Down
38 changes: 23 additions & 15 deletions src/occ_405/amec/amec_sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,27 +436,35 @@ typedef struct
//-------------------------------------------------------------

typedef struct {
bool disabled; // GPU has been marked failed and no longer monitored
bool readOnce; // Comm has been established with GPU
bool overtempError; // Core OT error has been logged against GPU
bool memOvertempError; // Memory OT error has been logged against GPU
bool disabled; // GPU has been marked failed and no longer monitored
bool readOnce; // Comm has been established with GPU
bool commErrorLogged; // GPU has been called out due to comm error
bool overtempError; // Core OT error has been logged against GPU
bool memOvertempError; // Memory OT error has been logged against GPU
bool checkDriverLoaded; // Indicates if need to check if driver is loaded
bool driverLoaded; // Indicates if GPU driver is loaded
bool driverLoaded; // Indicates if GPU driver is loaded
bool checkMemTempSupport; // Indicates if need to check if mem monitoring is supported
bool memTempSupported; // Indicates if memory temperature monitoring is supported
uint8_t memErrorCount; // count of consecutive GPU mem temp read failures
uint8_t errorCount; // count of consecutive GPU core temp read failures
bool memTempSupported; // Indicates if memory temperature monitoring is supported
bool notReset; // '1' = GPU NOT in reset. Read from OCC FLAGS register
bool coreTempNotAvailable; // for fan control: '1' = core temp not available. (send 0 for fan control)
bool memTempNotAvailable; // for fan control: '1' = Mem temp not available. (send 0 for fan control)
bool coreTempFailure; // for fan control: '1' = timeout failure reading core temp (send 0xFF for fan control)
bool memTempFailure; // for fan control: '1' = timeout failure reading Mem temp (send 0xFF for fan control)
uint8_t memErrorCount; // count of consecutive GPU mem temp read failures when GPU not in reset
uint8_t errorCount; // count of consecutive GPU core temp read failures when GPU not in reset
uint8_t retryCount; // count of consecutive GPU core temp read failures before I2C reset
} gpuStatus_t;

typedef struct {
bool check_pwr_limit; // Indicates if need to read power limits from GPU
bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max
bool gpu_min_cap_required; // Indicates if power limits were read i.e. have min/max
uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU
uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU
uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set
bool check_pwr_limit; // Indicates if need to read power limits from GPU
bool pwr_limits_read; // Indicates if power limits were read i.e. have min/max
bool set_failed; // Indicates if failed to set power limit
bool gpu_min_cap_required; // Indicates if GPU requires min cap
uint32_t gpu_min_pcap_mw; // Min GPU power limit in mW read from the GPU
uint32_t gpu_max_pcap_mw; // Max GPU power limit in mW read from the GPU
uint32_t gpu_desired_pcap_mw; // AMEC determined pcap in mW to set
uint32_t gpu_requested_pcap_mw; // Requested power cap in mW sent to GPU
uint32_t gpu_actual_pcap_mw; // Actual power cap in mW read back from the GPU
uint32_t gpu_default_pcap_mw; // Default power cap in mW read from the GPU
} gpuPcap_t;


Expand Down
46 changes: 34 additions & 12 deletions src/occ_405/cmdh/cmdh_fsp_cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
const sensor_t *vrfan = getSensorByGsid(VRMPROCOT);
if (vrfan != NULL)
{
l_tempSensorList[l_sensorHeader.count].id = G_sysConfigData.proc_huid;
l_tempSensorList[l_sensorHeader.count].id = 0;
l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_VRM;
l_tempSensorList[l_sensorHeader.count].value = vrfan->sample & 0xFF;
l_sensorHeader.count++;
Expand All @@ -358,24 +358,46 @@ ERRL_RC cmdh_poll_v20(cmdh_fsp_rsp_t * o_rsp_ptr)
// Add GPU temperatures
for (k=0; k<MAX_NUM_GPU_PER_DOMAIN; k++)
{
if(GPU_PRESENT(k)) // temp until GPU sensor IDs are sent make sensor ids "GPU"<gpu#>
if(GPU_PRESENT(k))
{
// GPU core temperature
if(G_amec_sensor_list[TEMPGPU0 + k]->ipmi_sid) // temp
l_tempSensorList[l_sensorHeader.count].id = G_amec_sensor_list[TEMPGPU0 + k]->ipmi_sid;
else
l_tempSensorList[l_sensorHeader.count].id = 0xC6 + (9 * G_pbax_id.chip_id) + (k*3); // temp
l_tempSensorList[l_sensorHeader.count].id = G_amec_sensor_list[TEMPGPU0 + k]->ipmi_sid;
l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_GPU;
l_tempSensorList[l_sensorHeader.count].value = (G_amec_sensor_list[TEMPGPU0 + k]->sample) & 0xFF;
if(g_amec->gpu[k].status.coreTempFailure)
{
// failed to read core temperature return 0xFF
l_tempSensorList[l_sensorHeader.count].value = 0xFF;
}
else if(g_amec->gpu[k].status.coreTempNotAvailable)
{
// core temperature not available return 0
l_tempSensorList[l_sensorHeader.count].value = 0;
}
else
{
// have a good core temperature return the reading
l_tempSensorList[l_sensorHeader.count].value = (G_amec_sensor_list[TEMPGPU0 + k]->sample) & 0xFF;
}
l_sensorHeader.count++;

// GPU memory temperature
if(G_amec_sensor_list[TEMPGPU0 + k]->ipmi_sid) // temp
l_tempSensorList[l_sensorHeader.count].id = G_amec_sensor_list[TEMPGPU0MEM + k]->ipmi_sid;
else
l_tempSensorList[l_sensorHeader.count].id = 0xC7 + (9 * G_pbax_id.chip_id) + (k*3); // temp
l_tempSensorList[l_sensorHeader.count].id = G_amec_sensor_list[TEMPGPU0MEM + k]->ipmi_sid;
l_tempSensorList[l_sensorHeader.count].fru_type = DATA_FRU_GPU_MEM;
l_tempSensorList[l_sensorHeader.count].value = (G_amec_sensor_list[TEMPGPU0MEM + k]->sample) & 0xFF;
if(g_amec->gpu[k].status.memTempFailure)
{
// failed to read memory temperature return 0xFF
l_tempSensorList[l_sensorHeader.count].value = 0xFF;
}
else if(g_amec->gpu[k].status.memTempNotAvailable)
{
// memory temperature not available return 0
l_tempSensorList[l_sensorHeader.count].value = 0;
}
else
{
// have a good memory temperature return the reading
l_tempSensorList[l_sensorHeader.count].value = (G_amec_sensor_list[TEMPGPU0MEM + k]->sample) & 0xFF;
}
l_sensorHeader.count++;
}
}
Expand Down

0 comments on commit 74f721c

Please sign in to comment.