From 3f57751abd8ca0308e3938dc86d5a313b7599ebc Mon Sep 17 00:00:00 2001 From: William Bryan Date: Tue, 22 Aug 2017 11:50:01 -0500 Subject: [PATCH] 405 Side GPU Core Temp Collection Change-Id: Ia1b10f5208c49ba168dcf338f0cbeb2c4ab46971 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/44982 Tested-by: FSP CI Jenkins Reviewed-by: Martha Broyles Reviewed-by: Christopher J. Cain Reviewed-by: William A. Bryan --- src/common/gpu_structs.h | 27 +- src/occ_405/amec/amec_sensors_power.c | 2 + src/occ_405/gpu/gpu.c | 647 +++++++++++++------------- src/occ_405/gpu/gpu.h | 24 +- src/occ_405/occbuildname.c | 2 +- src/occ_405/rtls/rtls.h | 6 +- src/occ_405/rtls/rtls_tables.c | 25 +- src/occ_gpe0/gpe0_main.c | 6 - src/occ_gpe1/ipc_func_tables.c | 5 + 9 files changed, 379 insertions(+), 365 deletions(-) diff --git a/src/common/gpu_structs.h b/src/common/gpu_structs.h index 03c8e069..7933adbd 100644 --- a/src/common/gpu_structs.h +++ b/src/common/gpu_structs.h @@ -57,18 +57,19 @@ typedef enum typedef enum { GPU_REQ_INIT = 0x01, // Init interrupt registers - GPU_REQ_READ_CAPS_START = 0x02, // Start reading capabilities - GPU_REQ_READ_CAPS_STOP = 0x03, - GPU_REQ_READ_CAPS = 0x04, - GPU_REQ_READ_TEMP_SIMPLE_START = 0x05, // Start reading GPU information - GPU_REQ_READ_TEMP_SIMPLE_STOP = 0x06, // Read GPU temp register - GPU_REQ_READ_TEMP_SIMPLE = 0x07, // Start reading GPU temperature - GPU_REQ_READ_TEMP_START = 0x08, // Start reading GPU information - GPU_REQ_READ_TEMP_STOP = 0x09, // Read GPU temp register - GPU_REQ_READ_TEMP = 0x0A, // Start reading GPU temperature - GPU_REQ_READ_PWR_LIMIT_START = 0x0B, // Start reading GPU information - GPU_REQ_READ_PWR_LIMIT_STOP = 0x0C, // Read GPU temp register - GPU_REQ_READ_PWR_LIMIT = 0x0D, // Start reading pwr limit + GPU_REQ_READ_TEMP_START = 0x02, // Start reading GPU information + GPU_REQ_READ_TEMP_FINISH = 0x03, // Read GPU temp register + GPU_REQ_READ_MEM_TEMP_START = 0x04, // Initiate memory temp reading + GPU_REQ_READ_MEM_TEMP_2 = 0x05, // mem temp step 2 + GPU_REQ_READ_MEM_TEMP_3 = 0x06, // mem temp step 3 + GPU_REQ_READ_MEM_TEMP_FINISH = 0x07, // Get memory temp reading + GPU_REQ_READ_CAPS_START = 0x08, // Start reading capabilities + GPU_REQ_READ_CAPS_2 = 0x09, // Start reading capabilities + GPU_REQ_READ_CAPS_3 = 0x0A, // Start reading capabilities + GPU_REQ_READ_CAPS_FINISH = 0x0B, + GPU_REQ_READ_PWR_LIMIT_START = 0x0C, // Start reading GPU information + GPU_REQ_READ_PWR_LIMIT_STOP = 0x0D, // Read GPU temp register + GPU_REQ_READ_PWR_LIMIT = 0x0E, // Start reading pwr limit GPU_REQ_RESET = 0x60, // Reset } gpu_op_req_e; @@ -78,7 +79,7 @@ typedef struct GpeErrorStruct error; uint8_t gpu_id; uint8_t operation; - uint32_t data[MAX_GPUS]; + uint64_t data; } gpu_sm_args_t; diff --git a/src/occ_405/amec/amec_sensors_power.c b/src/occ_405/amec/amec_sensors_power.c index 38203307..b58a9708 100755 --- a/src/occ_405/amec/amec_sensors_power.c +++ b/src/occ_405/amec/amec_sensors_power.c @@ -75,6 +75,7 @@ extern bool G_vrm_thermal_monitoring; extern PWR_READING_TYPE G_pwr_reading_type; extern bool G_apss_present; extern OCCPstateParmBlock G_oppb; +extern task_t G_task_table[TASK_END]; //*************************************************************************/ // Code @@ -826,6 +827,7 @@ void amec_update_gpu_configuration(void) { // GPUs are present enable monitoring G_gpu_monitoring_allowed = TRUE; + G_task_table[TASK_ID_GPU_SM].flags = GPU_RTL_FLAGS; } G_first_sys_gpu_config = l_valid_bitmask_sys; G_first_num_gpus_sys = l_num_gpus_sys; diff --git a/src/occ_405/gpu/gpu.c b/src/occ_405/gpu/gpu.c index 2fd2b825..522ea842 100755 --- a/src/occ_405/gpu/gpu.c +++ b/src/occ_405/gpu/gpu.c @@ -23,7 +23,6 @@ /* */ /* IBM_PROLOG_END_TAG */ -//#define GPU_DEBUG #ifdef GPU_DEBUG #define GPU_DBG(frmt,args...) DBG_PRINT(frmt,##args) #else @@ -335,67 +334,69 @@ void mark_gpu_failed(const gpu_sm_args_t *i_arg) { uint32_t gpu_id = i_arg->gpu_id; - // ignore all errors if haven't reached timeout for comm established - if( (false == g_amec->gpu[gpu_id].status.readOnce) && - (DURATION_IN_S_UNTIL_NOW_FROM(G_gpu_sm_start_time) < GPU_COMM_ESTAB_TIMEOUT_SECONDS) ) - { - // do nothing at this time - return; - } - if((false == g_amec->gpu[gpu_id].status.disabled) && - (true == g_amec->gpu[gpu_id].status.readOnce)) - { - INTR_TRAC_ERR("mark_gpu_failed: GPU%d failed in op/rc/count=0x%06X " - "(ffdc 0x%08X%08X)", - gpu_id, (i_arg->operation << 16) | (i_arg->error.rc << 8) | g_amec->gpu[gpu_id].status.errorCount, - WORD_HIGH(i_arg->error.ffdc), WORD_LOW(i_arg->error.ffdc)); - } - - if( ( ++g_amec->gpu[gpu_id].status.errorCount > MAX_CONSECUTIVE_GPU_RESETS) && - (false == g_amec->gpu[gpu_id].status.disabled) && - (true == g_amec->gpu[gpu_id].status.readOnce)) + do { - G_gpu_state = GPU_STATE_IDLE; - // Disable this GPU, collect FFDC and log error - g_amec->gpu[gpu_id].status.disabled = true; - - INTR_TRAC_ERR("mark_gpu_failed: disabling GPU%d due to %d consecutive errors (op=%d)", - gpu_id, g_amec->gpu[gpu_id].status.errorCount, i_arg->operation); - errlHndl_t l_err = NULL; - /* - * @errortype - * @moduleid GPU_MID_MARK_GPU_FAILED - * @reasoncode GPU_FAILURE - * @userdata1 GPE returned rc code - * @userdata4 ERC_GPU_COMPLETE_FAILURE - * @devdesc GPU failure - */ - l_err = createErrl(GPU_MID_MARK_GPU_FAILED, - GPU_FAILURE, - ERC_GPU_COMPLETE_FAILURE, - ERRL_SEV_PREDICTIVE, - NULL, - DEFAULT_TRACE_SIZE, - i_arg->error.rc, - 0); - - addUsrDtlsToErrl(l_err, - (uint8_t*)&i_arg->error.ffdc, - sizeof(i_arg->error.ffdc), - ERRL_STRUCT_VERSION_1, - ERRL_USR_DTL_BINARY_DATA); - - // Callout the GPU if have sensor ID for it - if(G_sysConfigData.gpu_sensor_ids[gpu_id]) + // ignore all errors if haven't reached timeout for comm established + if( (false == g_amec->gpu[gpu_id].status.readOnce) && + (DURATION_IN_S_UNTIL_NOW_FROM(G_gpu_sm_start_time) < GPU_COMM_ESTAB_TIMEOUT_SECONDS) ) { - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_HUID, - G_sysConfigData.gpu_sensor_ids[gpu_id], - ERRL_CALLOUT_PRIORITY_MED); + // do nothing but reset at this time + break; + } + if((false == g_amec->gpu[gpu_id].status.disabled) && + (true == g_amec->gpu[gpu_id].status.readOnce)) + { + INTR_TRAC_ERR("mark_gpu_failed: GPU%d failed in op/rc/count=0x%06X " + "(ffdc 0x%08X%08X)", + gpu_id, (i_arg->operation << 16) | (i_arg->error.rc << 8) | g_amec->gpu[gpu_id].status.errorCount, + WORD_HIGH(i_arg->error.ffdc), WORD_LOW(i_arg->error.ffdc)); } - commitErrl(&l_err); - } + if( ( ++g_amec->gpu[gpu_id].status.errorCount > MAX_CONSECUTIVE_GPU_RESETS) && + (false == g_amec->gpu[gpu_id].status.disabled) && + (true == g_amec->gpu[gpu_id].status.readOnce)) + { + G_gpu_state = GPU_STATE_IDLE; + // Disable this GPU, collect FFDC and log error + g_amec->gpu[gpu_id].status.disabled = true; + + INTR_TRAC_ERR("mark_gpu_failed: disabling GPU%d due to %d consecutive errors (op=%d)", + gpu_id, g_amec->gpu[gpu_id].status.errorCount, i_arg->operation); + errlHndl_t l_err = NULL; + /* + * @errortype + * @moduleid GPU_MID_MARK_GPU_FAILED + * @reasoncode GPU_FAILURE + * @userdata1 GPE returned rc code + * @userdata4 ERC_GPU_COMPLETE_FAILURE + * @devdesc GPU failure + */ + l_err = createErrl(GPU_MID_MARK_GPU_FAILED, + GPU_FAILURE, + ERC_GPU_COMPLETE_FAILURE, + ERRL_SEV_PREDICTIVE, + NULL, + DEFAULT_TRACE_SIZE, + i_arg->error.rc, + 0); + addUsrDtlsToErrl(l_err, + (uint8_t*)&i_arg->error.ffdc, + sizeof(i_arg->error.ffdc), + ERRL_STRUCT_VERSION_1, + ERRL_USR_DTL_BINARY_DATA); + + // Callout the GPU if have sensor ID for it + if(G_sysConfigData.gpu_sensor_ids[gpu_id]) + { + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.gpu_sensor_ids[gpu_id], + ERRL_CALLOUT_PRIORITY_MED); + } + + commitErrl(&l_err); + } + } while(0); // Reset GPU G_gpu_i2c_reset_required = true; @@ -428,20 +429,21 @@ bool schedule_gpu_req(const gpu_op_req_e i_operation, gpu_sm_args_t i_new_args) // Read GPU memory temp capability case GPU_REQ_READ_CAPS_START: - case GPU_REQ_READ_CAPS_STOP: - case GPU_REQ_READ_CAPS: + case GPU_REQ_READ_CAPS_2: + case GPU_REQ_READ_CAPS_3: + case GPU_REQ_READ_CAPS_FINISH: break; - // Read GPU memory temp + // Read GPU core temp case GPU_REQ_READ_TEMP_START: - case GPU_REQ_READ_TEMP_STOP: - case GPU_REQ_READ_TEMP: + case GPU_REQ_READ_TEMP_FINISH: break; - // Read GPU core temp - case GPU_REQ_READ_TEMP_SIMPLE_START: - case GPU_REQ_READ_TEMP_SIMPLE_STOP: - case GPU_REQ_READ_TEMP_SIMPLE: + // Read GPU memory temp + case GPU_REQ_READ_MEM_TEMP_START: + case GPU_REQ_READ_MEM_TEMP_2: + case GPU_REQ_READ_MEM_TEMP_3: + case GPU_REQ_READ_MEM_TEMP_FINISH: break; // I2C reset @@ -543,27 +545,27 @@ bool gpu_reset_sm() if (async_request_is_idle(&G_gpu_op_request.request)) { - // check if the previous state was successfully scheduled and success/done - if( (L_reset_state != GPU_RESET_STATE_NEW) && - (L_reset_state != GPU_RESET_STATE_RESET_SLAVE_WAIT) && - (!L_scheduled || (GPE_RC_SUCCESS != G_gpu_op_req_args.error.rc)) ) - { - // Check if failure was due to GPE image not having GPU support - if(G_gpu_op_req_args.error.rc == GPE_RC_NO_GPU_SUPPORT) - { - // No GPU Support, log error and disable all GPUs - INTR_TRAC_ERR("gpu_reset_sm: GPE image doesn't support GPUs!"); + // check if the previous state was successfully scheduled and success/done + if( (L_reset_state != GPU_RESET_STATE_NEW) && + (L_reset_state != GPU_RESET_STATE_RESET_SLAVE_WAIT) && + (!L_scheduled || (GPE_RC_SUCCESS != G_gpu_op_req_args.error.rc)) ) + { + // Check if failure was due to GPE image not having GPU support + if(G_gpu_op_req_args.error.rc == GPE_RC_NO_GPU_SUPPORT) + { + // No GPU Support, log error and disable all GPUs + INTR_TRAC_ERR("gpu_reset_sm: GPE image doesn't support GPUs!"); - /* - * @errortype - * @moduleid GPU_MID_GPU_RESET_SM - * @reasoncode GPU_FAILURE - * @userdata1 0 - * @userdata2 0 - * @userdata4 ERC_GPU_NO_GPE_SUPPORT - * @devdesc GPE1 image doesn't support GPU communication - */ - errlHndl_t err = createErrl(GPU_MID_GPU_RESET_SM, + /* + * @errortype + * @moduleid GPU_MID_GPU_RESET_SM + * @reasoncode GPU_FAILURE + * @userdata1 0 + * @userdata2 0 + * @userdata4 ERC_GPU_NO_GPE_SUPPORT + * @devdesc GPE1 image doesn't support GPU communication + */ + errlHndl_t err = createErrl(GPU_MID_GPU_RESET_SM, GPU_FAILURE, ERC_GPU_NO_GPE_SUPPORT, ERRL_SEV_UNRECOVERABLE, @@ -571,39 +573,41 @@ bool gpu_reset_sm() DEFAULT_TRACE_SIZE, 0, 0); - commitErrl(&err); + commitErrl(&err); - disable_all_gpus(); + disable_all_gpus(); - L_reset_state = GPU_RESET_STATE_NEW; - return FALSE; // GPUs are not ready for communication - } - else - { - // Stay in current state if haven't reached state retry count - if(L_state_retry_count < MAX_GPU_RESET_STATE_RETRY) - { - // INC state retry count and retry current state - L_state_retry_count++; - } - else // this reset attempt failed - { - // Stop trying if reached max resets - if(L_consec_reset_failure_count > MAX_CONSECUTIVE_GPU_RESETS) + L_reset_state = GPU_RESET_STATE_NEW; + return FALSE; // GPUs are not ready for communication + } + else + { + // Stay in current state if haven't reached state retry count + if(L_state_retry_count < MAX_GPU_RESET_STATE_RETRY) { - INTR_TRAC_ERR("gpu_reset_sm: Max Resets reached failed at state 0x%02X", - L_reset_state); - - /* - * @errortype - * @moduleid GPU_MID_GPU_RESET_SM - * @reasoncode GPU_FAILURE - * @userdata1 GPU reset state - * @userdata2 0 - * @userdata4 ERC_GPU_RESET_FAILURE - * @devdesc Failure resetting GPU interface - */ - errlHndl_t err = createErrl(GPU_MID_GPU_RESET_SM, + // INC state retry count and retry current state + L_state_retry_count++; + } + else // this reset attempt failed + { + // Stop trying if reached max resets + if( (L_consec_reset_failure_count > MAX_CONSECUTIVE_GPU_RESETS) && + (DURATION_IN_S_UNTIL_NOW_FROM(G_gpu_sm_start_time) >= + GPU_COMM_ESTAB_TIMEOUT_SECONDS)) + { + INTR_TRAC_ERR("gpu_reset_sm: Max Resets reached failed at state 0x%02X", + L_reset_state); + + /* + * @errortype + * @moduleid GPU_MID_GPU_RESET_SM + * @reasoncode GPU_FAILURE + * @userdata1 GPU reset state + * @userdata2 0 + * @userdata4 ERC_GPU_RESET_FAILURE + * @devdesc Failure resetting GPU interface + */ + errlHndl_t err = createErrl(GPU_MID_GPU_RESET_SM, GPU_FAILURE, ERC_GPU_RESET_FAILURE, ERRL_SEV_UNRECOVERABLE, @@ -611,68 +615,64 @@ bool gpu_reset_sm() DEFAULT_TRACE_SIZE, L_reset_state, 0); - commitErrl(&err); - - disable_all_gpus(); - - L_reset_state = GPU_RESET_STATE_NEW; - return FALSE; // GPUs are not ready for communication - } - else // try the reset again from the beginning - { - L_consec_reset_failure_count++; - L_state_retry_count = 0; - L_reset_state = GPU_RESET_STATE_RESET_MASTER; - } - } // else reset attempt failed - } // else GPE supports GPU - }// if previous state failed - else // success on last state go to next state and process it - { - L_state_retry_count = 0; - L_reset_state++; - } + commitErrl(&err); + + disable_all_gpus(); + + L_reset_state = GPU_RESET_STATE_NEW; + return FALSE; // GPUs are not ready for communication + } + else // try the reset again from the beginning + { + L_consec_reset_failure_count++; + L_state_retry_count = 0; + L_reset_state = GPU_RESET_STATE_RESET_MASTER; + } + } // else reset attempt failed + } // else GPE supports GPU + }// if previous state failed + else // success on last state go to next state and process it + { + L_state_retry_count = 0; + L_reset_state++; + } - L_scheduled = FALSE; // default nothing scheduled + L_scheduled = FALSE; // default nothing scheduled - switch (L_reset_state) - { - case GPU_RESET_STATE_RESET_MASTER: - G_new_gpu_req_args.data[0] = GPU_RESET_REQ_MASTER; - L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); - break; + switch (L_reset_state) + { + case GPU_RESET_STATE_INIT_BUS: + // Setup I2C Interrupt Mask Register + L_scheduled = schedule_gpu_req(GPU_REQ_INIT, G_new_gpu_req_args); + break; - case GPU_RESET_STATE_RESET_SLAVE: - G_new_gpu_req_args.data[0] = GPU_RESET_REQ_SLV; - L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); - break; + case GPU_RESET_STATE_RESET_MASTER: + G_new_gpu_req_args.data = GPU_RESET_REQ_MASTER; + L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); + break; - case GPU_RESET_STATE_RESET_SLAVE_WAIT: - // Delay to allow reset to complete - GPU_DBG("gpu_reset_sm: waiting during slave port 4 reset"); - break; + case GPU_RESET_STATE_RESET_SLAVE: + G_new_gpu_req_args.data = GPU_RESET_REQ_SLV; + L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); + break; - case GPU_RESET_STATE_RESET_SLAVE_COMPLETE: - G_new_gpu_req_args.data[0] = GPU_RESET_REQ_SLV_COMPLETE; - L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); - break; + case GPU_RESET_STATE_RESET_SLAVE_WAIT: + // Delay to allow reset to complete + GPU_DBG("gpu_reset_sm: waiting during slave port 4 reset"); + break; - case GPU_RESET_STATE_INIT: - // Notify GPE which GPUs are present - G_new_gpu_req_args.data[0] = (GPU_PRESENT(ID_GPU0)) ? GPU_STATE_PRESENT : 0; - G_new_gpu_req_args.data[1] = (GPU_PRESENT(ID_GPU1)) ? GPU_STATE_PRESENT : 0; - G_new_gpu_req_args.data[2] = (GPU_PRESENT(ID_GPU2)) ? GPU_STATE_PRESENT : 0; - // Setup I2C Interrupt Mask Register and Mode - L_scheduled = schedule_gpu_req(GPU_REQ_INIT, G_new_gpu_req_args); - break; + case GPU_RESET_STATE_RESET_SLAVE_COMPLETE: + G_new_gpu_req_args.data = GPU_RESET_REQ_SLV_COMPLETE; + L_scheduled = schedule_gpu_req(GPU_REQ_RESET, G_new_gpu_req_args); + break; - case GPU_RESET_STATE_INIT_COMPLETE: - // Reset and init is complete ready to start sending commands to the GPUs - l_complete = TRUE; - L_consec_reset_failure_count = 0; - // next time this is called will be to start a new reset - L_reset_state = GPU_RESET_STATE_NEW; - break; + case GPU_RESET_STATE_RESET_FINISH: + // Reset and init is complete ready to start sending commands to the GPUs + l_complete = TRUE; + L_consec_reset_failure_count = 0; + // next time this is called will be to start a new reset + L_reset_state = GPU_RESET_STATE_NEW; + break; default: INTR_TRAC_ERR("gpu_reset_sm: INVALID STATE: 0x%02X when reset is required", L_reset_state); @@ -680,22 +680,22 @@ bool gpu_reset_sm() break; } // switch L_reset_state - if(L_scheduled) - { - GPU_DBG("gpu_reset_sm: Scheduled reset state 0x%02X", L_reset_state); - } - // check if the state was expected to have a schedule. Only new and slave wait - // don't schedule for all other states the schedule must have failed - else if( (L_reset_state != GPU_RESET_STATE_NEW) && - (L_reset_state != GPU_RESET_STATE_RESET_SLAVE_WAIT) ) - { - INTR_TRAC_ERR("gpu_reset_sm: failed to schedule state 0x%02X", L_reset_state); - } + if(L_scheduled) + { + GPU_DBG("gpu_reset_sm: Scheduled reset state 0x%02X", L_reset_state); + } + // check if the state was expected to have a schedule. Only new and slave wait + // don't schedule for all other states the schedule must have failed + else if( (L_reset_state != GPU_RESET_STATE_NEW) && + (L_reset_state != GPU_RESET_STATE_RESET_SLAVE_WAIT) ) + { + INTR_TRAC_ERR("gpu_reset_sm: failed to schedule state 0x%02X", L_reset_state); + } } // if async_request_is_idle else { - INTR_TRAC_ERR("gpu_reset_sm: NOT idle for state 0x%02X", L_reset_state); + INTR_TRAC_ERR("gpu_reset_sm: NOT idle for state 0x%02X", L_reset_state); } return l_complete; @@ -709,154 +709,152 @@ bool gpu_reset_sm() // This function should only return that complete is TRUE when the temperature // read is complete (or determined failed) and ready to start reading a different GPU // -// Pre-Req: Caller must have G_current_gpu_id set for GPU to read and -// verified G_gpu_op_request is idle to allow scheduling +// Pre-Req: Caller must have G_current_gpu_id set for GPU to read and +// verified G_gpu_op_request is idle to allow scheduling // End Function Specification bool gpu_read_temp_sm() { bool l_complete = FALSE; // only return TRUE when the read is complete or failed uint16_t l_temp = 0; static bool L_scheduled = FALSE; // indicates if a GPU GPE request was scheduled - static uint8_t L_read_failure_count = 0; + static uint8_t L_read_failure_count = 0; // Used for I2C errors + static gpuReadTempState_e L_read_temp_state = GPU_STATE_READ_TEMP_NEW; // 1st state for reading temp if (async_request_is_idle(&G_gpu_op_request.request)) { - // If not starting a new read then need to check status of current state before moving on - // stay in current state if the schedule failed or the state isn't finished/failed - if( (L_read_temp_state != GPU_STATE_READ_TEMP_NEW) && - (!L_scheduled || (GPE_RC_SUCCESS != G_gpu_op_req_args.error.rc)) ) - { - // If reached retry count give up on this GPU - if(L_read_failure_count > MAX_GPU_READ_ATTEMPT) - { - mark_gpu_failed(&G_gpu_op_req_args); - - L_read_temp_state = GPU_STATE_READ_TEMP_NEW; - return TRUE; // Done with this GPU, let GPU SM move to next - } - else - { - // INC failure count and retry current state - L_read_failure_count++; - } - } - else // success on last state go to next state and process it - { - L_read_failure_count = 0; - L_read_temp_state++; - } - - L_scheduled = FALSE; // default nothing scheduled - - switch (L_read_temp_state) - { - case GPU_STATE_READ_TEMP_START: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_SIMPLE_START, G_new_gpu_req_args); - break; - - case GPU_STATE_READ_TEMP_STOP: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_SIMPLE_STOP, G_new_gpu_req_args); - break; - - case GPU_STATE_READ_TEMP_READ: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_SIMPLE, G_new_gpu_req_args); - break; - - case GPU_STATE_READ_TEMP_COMPLETE: - if( (!g_amec->gpu[G_current_gpu_id].status.readOnce) && - (0 != G_gpu_op_req_args.data[0]) ) // TODO: check for valid temp? - { - g_amec->gpu[G_current_gpu_id].status.readOnce = true; - TRAC_INFO("First successful attempt to read temp from GPU%d was on tick %d", - G_current_gpu_id, CURRENT_TICK); - // comm is now established update for capability checking to take place - g_amec->gpu[G_current_gpu_id].status.checkMemTempSupport = TRUE; - g_amec->gpu[G_current_gpu_id].status.checkDriverLoaded = TRUE; - } - // Update sensor - l_temp = G_gpu_op_req_args.data[0] >> 24; - sensor_update(AMECSENSOR_PTR(TEMPGPU0 + G_current_gpu_id), l_temp); - - // Clear all past errors - g_amec->gpu[G_current_gpu_id].status.errorCount = 0; - - // check if there is an overtemp that hasn't been reported - if((G_data_cnfg->thrm_thresh.data[DATA_FRU_GPU].error) && - (l_temp > G_data_cnfg->thrm_thresh.data[DATA_FRU_GPU].error) && - (!g_amec->gpu[G_current_gpu_id].status.overtempError) ) - { - g_amec->gpu[G_current_gpu_id].status.overtempError = TRUE; + // If not starting a new read then need to check status of current state before moving on + // stay in current state if the schedule failed or the state isn't finished/failed + if( (L_read_temp_state != GPU_STATE_READ_TEMP_NEW) && + (!L_scheduled || (GPE_RC_SUCCESS != G_gpu_op_req_args.error.rc)) ) + { + // If reached retry count give up on this GPU + if( (L_read_failure_count > MAX_GPU_READ_ATTEMPT) || + (GPE_RC_I2C_ERROR == G_gpu_op_req_args.error.rc) ) + { + mark_gpu_failed(&G_gpu_op_req_args); - INTR_TRAC_ERR("gpu_read_temp: GPU%d OT! temp[%d]", - G_current_gpu_id, l_temp); + L_read_temp_state = GPU_STATE_READ_TEMP_NEW; + return TRUE; // Done with this GPU, let GPU SM move to next + } + else + { + // INC failure count and retry current state + L_read_failure_count++; + } + } + else // success on last state go to next state and process it + { + L_read_failure_count = 0; + L_read_temp_state++; + } - // Log an OT error - /* @ - * @errortype - * @moduleid GPU_MID_GPU_READ_TEMP - * @reasoncode GPU_ERROR_TEMP - * @userdata1 GPU ID - * @userdata2 GPU memory temperature - * @userdata4 OCC_NO_EXTENDED_RC - * @devdesc GPU memory has reached error temperature - * - */ - errlHndl_t l_err = createErrl(GPU_MID_GPU_READ_TEMP, - GPU_ERROR_TEMP, - OCC_NO_EXTENDED_RC, - ERRL_SEV_PREDICTIVE, - NULL, - DEFAULT_TRACE_SIZE, - G_current_gpu_id, - l_temp); + L_scheduled = FALSE; // default nothing scheduled - // Callout the over temperature procedure - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_COMPONENT_ID, - ERRL_COMPONENT_ID_OVER_TEMPERATURE, - ERRL_CALLOUT_PRIORITY_HIGH); + switch (L_read_temp_state) + { + case GPU_STATE_READ_TEMP_START: + L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_START, G_new_gpu_req_args); + break; - // Callout the GPU if have sensor ID for it - if(G_sysConfigData.gpu_sensor_ids[G_current_gpu_id]) - { - addCalloutToErrl(l_err, - ERRL_CALLOUT_TYPE_HUID, - G_sysConfigData.gpu_sensor_ids[G_current_gpu_id], - ERRL_CALLOUT_PRIORITY_MED); - } + case GPU_STATE_READ_TEMP_FINISH: + L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_FINISH, G_new_gpu_req_args); + break; - // Commit Error - commitErrl(&l_err); + case GPU_STATE_READ_TEMP_COMPLETE: + if( (!g_amec->gpu[G_current_gpu_id].status.readOnce) && + (0 != G_gpu_op_req_args.data) ) // TODO: check for valid temp? + { + g_amec->gpu[G_current_gpu_id].status.readOnce = true; + TRAC_INFO("First successful attempt to read temp from GPU%d was on tick %d", + G_current_gpu_id, CURRENT_TICK); + // comm is now established update for capability checking to take place + g_amec->gpu[G_current_gpu_id].status.checkMemTempSupport = TRUE; + g_amec->gpu[G_current_gpu_id].status.checkDriverLoaded = TRUE; + } + // Update sensor + l_temp = G_gpu_op_req_args.data; + sensor_update(AMECSENSOR_PTR(TEMPGPU0 + G_current_gpu_id), l_temp); - } // if OT error + // Clear all past errors + g_amec->gpu[G_current_gpu_id].status.errorCount = 0; - // Done with this GPU ready to move to new one - L_read_temp_state = GPU_STATE_READ_TEMP_NEW; - l_complete = TRUE; - break; + // check if there is an overtemp that hasn't been reported + if((G_data_cnfg->thrm_thresh.data[DATA_FRU_GPU].error) && + (l_temp > G_data_cnfg->thrm_thresh.data[DATA_FRU_GPU].error) && + (!g_amec->gpu[G_current_gpu_id].status.overtempError) ) + { + g_amec->gpu[G_current_gpu_id].status.overtempError = TRUE; + + INTR_TRAC_ERR("gpu_read_temp: GPU%d OT! temp[%d]", + G_current_gpu_id, l_temp); + + // Log an OT error + /* @ + * @errortype + * @moduleid GPU_MID_GPU_READ_TEMP + * @reasoncode GPU_ERROR_TEMP + * @userdata1 GPU ID + * @userdata2 GPU memory temperature + * @userdata4 OCC_NO_EXTENDED_RC + * @devdesc GPU memory has reached error temperature + * + */ + errlHndl_t l_err = createErrl(GPU_MID_GPU_READ_TEMP, + GPU_ERROR_TEMP, + OCC_NO_EXTENDED_RC, + ERRL_SEV_PREDICTIVE, + NULL, + DEFAULT_TRACE_SIZE, + G_current_gpu_id, + l_temp); + + // Callout the over temperature procedure + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_COMPONENT_ID, + ERRL_COMPONENT_ID_OVER_TEMPERATURE, + ERRL_CALLOUT_PRIORITY_HIGH); + + // Callout the GPU if have sensor ID for it + if(G_sysConfigData.gpu_sensor_ids[G_current_gpu_id]) + { + addCalloutToErrl(l_err, + ERRL_CALLOUT_TYPE_HUID, + G_sysConfigData.gpu_sensor_ids[G_current_gpu_id], + ERRL_CALLOUT_PRIORITY_MED); + } + + // Commit Error + commitErrl(&l_err); + + } // if OT error + + // Done with this GPU ready to move to new one + L_read_temp_state = GPU_STATE_READ_TEMP_NEW; + l_complete = TRUE; + break; - default: - INTR_TRAC_ERR("gpu_read_temp_sm: INVALID STATE: 0x%02X", L_read_temp_state); - L_read_temp_state = GPU_STATE_READ_TEMP_NEW; - l_complete = TRUE; + default: + INTR_TRAC_ERR("gpu_read_temp_sm: INVALID STATE: 0x%02X", L_read_temp_state); + L_read_temp_state = GPU_STATE_READ_TEMP_NEW; + l_complete = TRUE; break; - } // switch L_read_temp_state + } // switch L_read_temp_state - if(L_scheduled) - { - GPU_DBG("gpu_read_temp_sm: Scheduled read temp state 0x%02X at tick %d", - L_read_temp_state, GPU_TICK); - } - else if(!l_complete) // if not complete there must have been a failure on the schedule - { - INTR_TRAC_ERR("gpu_read_temp_sm: failed to schedule state 0x%02X", L_read_temp_state); - } + if(L_scheduled) + { + GPU_DBG("gpu_read_temp_sm: Scheduled read temp state 0x%02X at tick %d", + L_read_temp_state, GPU_TICK); + } + else if(!l_complete) // if not complete there must have been a failure on the schedule + { + INTR_TRAC_ERR("gpu_read_temp_sm: failed to schedule state 0x%02X", L_read_temp_state); + } } // if async_request_is_idle else { - INTR_TRAC_ERR("gpu_read_temp_sm: NOT idle for state 0x%02X", L_read_temp_state); + INTR_TRAC_ERR("gpu_read_temp_sm: NOT idle for state 0x%02X", L_read_temp_state); } return l_complete; @@ -949,18 +947,21 @@ bool gpu_read_mem_temp_capability_sm() L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS_START, G_new_gpu_req_args); break; - case GPU_STATE_READ_MEM_TEMP_CAPABLE_STOP: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS_STOP, G_new_gpu_req_args); + case GPU_STATE_READ_MEM_TEMP_CAPABLE_2: + L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS_2, G_new_gpu_req_args); + break; + + case GPU_STATE_READ_MEM_TEMP_CAPABLE_3: + L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS_3, G_new_gpu_req_args); break; case GPU_STATE_READ_MEM_TEMP_CAPABLE_READ: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS, G_new_gpu_req_args); + L_scheduled = schedule_gpu_req(GPU_REQ_READ_CAPS_FINISH, G_new_gpu_req_args); break; case GPU_STATE_READ_MEM_TEMP_CAPABLE_COMPLETE: // Update capability - g_amec->gpu[G_current_gpu_id].status.memTempSupported = G_gpu_op_req_args.data[0] & 0x01; - + g_amec->gpu[G_current_gpu_id].status.memTempSupported = G_gpu_op_req_args.data & 0x01; // Done with this GPU ready to move to new one L_read_cap_state = GPU_STATE_READ_MEM_TEMP_CAPABLE_NEW; l_complete = TRUE; @@ -1089,20 +1090,20 @@ bool gpu_read_memory_temp_sm() switch (L_read_temp_state) { case GPU_STATE_READ_MEM_TEMP_START: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_START, G_new_gpu_req_args); + L_scheduled = schedule_gpu_req(GPU_REQ_READ_MEM_TEMP_START, G_new_gpu_req_args); break; case GPU_STATE_READ_MEM_TEMP_STOP: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP_STOP, G_new_gpu_req_args); + L_scheduled = schedule_gpu_req(GPU_REQ_READ_MEM_TEMP_2, G_new_gpu_req_args); break; case GPU_STATE_READ_MEM_TEMP_READ: - L_scheduled = schedule_gpu_req(GPU_REQ_READ_TEMP, G_new_gpu_req_args); + L_scheduled = schedule_gpu_req(GPU_REQ_READ_MEM_TEMP_3, G_new_gpu_req_args); break; case GPU_STATE_READ_MEM_TEMP_COMPLETE: // Update sensor - l_temp = G_gpu_op_req_args.data[0] >> 24; + l_temp = G_gpu_op_req_args.data; sensor_update(AMECSENSOR_PTR(TEMPGPU0MEM + G_current_gpu_id), l_temp); // Clear past errors @@ -1206,6 +1207,8 @@ bool gpu_sm_handle_idle_state(bool i_read_temp_start_needed, bool i_mem_temp_nee { // Check for next state in order of priority +//TODO: Enable when functional +#if 0 // 1. Need to set a power limit on a GPU? l_gpu_id = gpu_id_need_set_power_limit(); if(l_gpu_id != 0xFF) @@ -1216,6 +1219,7 @@ bool gpu_sm_handle_idle_state(bool i_read_temp_start_needed, bool i_mem_temp_nee l_new_state = TRUE; break; } +#endif // 2. check if Host needs lock if (!check_and_update_i2c_lock(GPU_I2C_ENGINE)) @@ -1227,6 +1231,8 @@ bool gpu_sm_handle_idle_state(bool i_read_temp_start_needed, bool i_mem_temp_nee break; } +//TODO: Enable when functional +#if 0 // 3. Need to check if driver is loaded? l_gpu_id = gpu_id_need_driver_check(); if(l_gpu_id != 0xFF) @@ -1276,6 +1282,7 @@ bool gpu_sm_handle_idle_state(bool i_read_temp_start_needed, bool i_mem_temp_nee } } } +#endif // 6. Time to start new temperature reads? if(i_read_temp_start_needed) @@ -1332,6 +1339,7 @@ void task_gpu_sm(struct task *i_self) // are functional or GPU I2C interface is broken if(G_gpu_monitoring_allowed) { + // Initialize the IPC commands if this is our first run if(L_gpu_first_run) { @@ -1341,7 +1349,7 @@ void task_gpu_sm(struct task *i_self) } // Check if time to start reading temperatures - // GPU tempertures (core and memory) are only used for fan control which happens every 1s + // GPU temperatures (core and memory) are only used for fan control which happens every 1s // so there is no need to read the GPU temperatures any faster than every 1s if(!L_read_temp_start_needed) { @@ -1355,6 +1363,7 @@ void task_gpu_sm(struct task *i_self) // make sure OCC owns the lock in order to send commands to the GPU if( (L_occ_owns_lock == FALSE) || (G_gpu_state == GPU_STATE_NO_LOCK) ) { + // Check if host gave up the I2C lock L_occ_owns_lock = check_and_update_i2c_lock(GPU_I2C_ENGINE); if (L_occ_owns_lock) @@ -1557,4 +1566,6 @@ void task_gpu_sm(struct task *i_self) } }while((l_start_next_state) && (!l_next_state)); } // GPU monitoring enabled + + } // end task_gpu_sm() diff --git a/src/occ_405/gpu/gpu.h b/src/occ_405/gpu/gpu.h index 91d081b9..c8f13ffc 100644 --- a/src/occ_405/gpu/gpu.h +++ b/src/occ_405/gpu/gpu.h @@ -52,12 +52,12 @@ typedef enum typedef enum { GPU_RESET_STATE_NEW = 0x01, // new reset attempt - GPU_RESET_STATE_RESET_MASTER = 0x02, // Reset master - GPU_RESET_STATE_RESET_SLAVE = 0x03, // Start of slave port 4 reset - GPU_RESET_STATE_RESET_SLAVE_WAIT = 0x04, - GPU_RESET_STATE_RESET_SLAVE_COMPLETE = 0x05, - GPU_RESET_STATE_INIT = 0x06, - GPU_RESET_STATE_INIT_COMPLETE = 0x07, + GPU_RESET_STATE_INIT_BUS = 0x02, + GPU_RESET_STATE_RESET_MASTER = 0x03, // Reset master + GPU_RESET_STATE_RESET_SLAVE = 0x04, // Start of slave port 4 reset + GPU_RESET_STATE_RESET_SLAVE_WAIT = 0x05, + GPU_RESET_STATE_RESET_SLAVE_COMPLETE = 0x06, + GPU_RESET_STATE_RESET_FINISH = 0x07, } gpuResetState_e; // States for reading GPU core temperature (gpu_read_temp_sm) @@ -65,9 +65,8 @@ typedef enum { GPU_STATE_READ_TEMP_NEW = 0x11, // new temp read GPU_STATE_READ_TEMP_START = 0x12, // start write temp reg - GPU_STATE_READ_TEMP_STOP = 0x13, // stop write/begin read - GPU_STATE_READ_TEMP_READ = 0x14, // read temperature - GPU_STATE_READ_TEMP_COMPLETE = 0x15, // store temperature read + GPU_STATE_READ_TEMP_FINISH = 0x13, // read temperature + GPU_STATE_READ_TEMP_COMPLETE = 0x14, // store temperature read } gpuReadTempState_e; // States for reading GPU memory temperature (gpu_read_mem_temp_sm) @@ -85,9 +84,10 @@ typedef enum { GPU_STATE_READ_MEM_TEMP_CAPABLE_NEW = 0x31, GPU_STATE_READ_MEM_TEMP_CAPABLE_START = 0x32, - GPU_STATE_READ_MEM_TEMP_CAPABLE_STOP = 0x33, - GPU_STATE_READ_MEM_TEMP_CAPABLE_READ = 0x34, - GPU_STATE_READ_MEM_TEMP_CAPABLE_COMPLETE = 0x35, + GPU_STATE_READ_MEM_TEMP_CAPABLE_2 = 0x33, + GPU_STATE_READ_MEM_TEMP_CAPABLE_3 = 0x34, + GPU_STATE_READ_MEM_TEMP_CAPABLE_READ = 0x35, + GPU_STATE_READ_MEM_TEMP_CAPABLE_COMPLETE = 0x36, } gpuReadMemTempCapableState_e; // GPU IPC initialization diff --git a/src/occ_405/occbuildname.c b/src/occ_405/occbuildname.c index c6044123..eddf8555 100755 --- a/src/occ_405/occbuildname.c +++ b/src/occ_405/occbuildname.c @@ -34,6 +34,6 @@ volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = #else -volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_170816a\0" /**/ ; +volatile const char G_occ_buildname[16] __attribute__((section(".buildname"))) = /**/ "op_occ_170822a\0" /**/ ; #endif diff --git a/src/occ_405/rtls/rtls.h b/src/occ_405/rtls/rtls.h index 899d44a4..973bab4c 100755 --- a/src/occ_405/rtls/rtls.h +++ b/src/occ_405/rtls/rtls.h @@ -56,7 +56,7 @@ typedef enum { TASK_ID_AMEC_SLAVE, // AMEC SMH tasks TASK_ID_AMEC_MASTER, // AMEC SMH tasks TASK_ID_CORE_DATA_CONTROL, -// TASK_ID_GPU_SM, // GPU State Machine TODO RTC: 133824 + TASK_ID_GPU_SM, // GPU State Machine TASK_ID_DIMM_SM, // DIMM State Machine TASK_ID_MEMORY_CONTROL, // Memory (centaur/dimm) control task TASK_ID_NEST_DTS, @@ -109,6 +109,10 @@ typedef struct RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | \ RTL_FLAG_RUN | RTL_FLAG_APSS_NOT_INITD) +#define GPU_RTL_FLAGS (RTL_FLAG_MSTR | RTL_FLAG_NOTMSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | \ + RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | RTL_FLAG_RUN | \ + RTL_FLAG_APSS_NOT_INITD) + // Tick Timer definitions: #define MICS_PER_TICK G_mics_per_tick // Number of micro-seconds per tick #define MAX_NUM_TICKS 16 // Number of entries in the global tick table (power of 2) diff --git a/src/occ_405/rtls/rtls_tables.c b/src/occ_405/rtls/rtls_tables.c index eb439cfd..c23e1eaa 100755 --- a/src/occ_405/rtls/rtls_tables.c +++ b/src/occ_405/rtls/rtls_tables.c @@ -38,6 +38,7 @@ #include "dimm.h" #include #include "sensor_get_tod_task.h" // For task_get_tod() +#include "gpu.h" //flags for task table #define APSS_TASK_FLAGS RTL_FLAG_MSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_RUN @@ -64,6 +65,7 @@ #define FLAGS_AMEC_MASTER RTL_FLAG_MSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | RTL_FLAG_RUN | RTL_FLAG_APSS_NOT_INITD #define FLAGS_24X7 RTL_FLAG_MSTR | RTL_FLAG_NOTMSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | RTL_FLAG_RUN | RTL_FLAG_APSS_NOT_INITD +#define FLAGS_GPU_SM RTL_FLAG_NONE #define FLAGS_GET_TOD RTL_FLAG_MSTR | RTL_FLAG_NOTMSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | RTL_FLAG_RUN | RTL_FLAG_APSS_NOT_INITD @@ -82,10 +84,6 @@ #define FLAGS_GPE_TIMINGS RTL_FLAG_MSTR | RTL_FLAG_NOTMSTR | RTL_FLAG_OBS | RTL_FLAG_ACTIVE | RTL_FLAG_MSTR_READY | RTL_FLAG_NO_APSS | RTL_FLAG_RUN | RTL_FLAG_APSS_NOT_INITD -// TEMP/TODO RTC: 133824 - New GPU interface via main memory and SMBUS -#define FLAGS_GPU_SM - - // Global tick sequences // The number and size of these will vary as the real tick sequences are developed over time. @@ -122,8 +120,7 @@ task_t G_task_table[TASK_END] = { { FLAGS_AMEC_SLAVE, task_amec_slave, NULL }, // TASK_ID_AMEC_SLAVE { FLAGS_AMEC_MASTER, task_amec_master, NULL }, // TASK_ID_AMEC_MASTER { FLAGS_CORE_DATA_CONTROL, task_core_data_control, NULL }, // TASK_ID_CORE_DATA_CONTROL -// TODO RTC: 133824 - New GPU interface via main memory and SMBUS -// { FLAGS_GPU_SM, task_gpu_sm, NULL }, // TASK_ID_GPU_SM + { FLAGS_GPU_SM, task_gpu_sm, NULL }, // TASK_ID_GPU_SM { FLAGS_MEMORY_DATA, task_dimm_sm, NULL }, // TASK_ID_DIMM_SM { FLAGS_MEMORY_CONTROL, task_memory_control, (void *) &G_memory_control_task }, // TASK_ID_MEMORY_CONTROL { FLAGS_NEST_DTS, task_nest_dts, NULL }, @@ -159,7 +156,7 @@ const uint8_t G_tick1_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, TASK_ID_CORE_DATA_LOW, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_APSS_DONE, TASK_ID_MEMORY_CONTROL, @@ -205,7 +202,7 @@ const uint8_t G_tick3_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, TASK_ID_NEST_DTS, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_CORE_DATA_HIGH, TASK_ID_APSS_DONE, @@ -251,7 +248,7 @@ const uint8_t G_tick5_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, TASK_ID_CORE_DATA_LOW, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_APSS_DONE, TASK_ID_MEMORY_CONTROL, @@ -296,7 +293,7 @@ const uint8_t G_tick6_seq[] = { const uint8_t G_tick7_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_CORE_DATA_HIGH, TASK_ID_APSS_DONE, @@ -342,7 +339,7 @@ const uint8_t G_tick9_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, TASK_ID_CORE_DATA_LOW, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_APSS_DONE, TASK_ID_MEMORY_CONTROL, @@ -387,7 +384,7 @@ const uint8_t G_tick10_seq[] = { const uint8_t G_tick11_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_CORE_DATA_HIGH, TASK_ID_APSS_DONE, @@ -433,7 +430,7 @@ const uint8_t G_tick13_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, TASK_ID_CORE_DATA_LOW, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_APSS_DONE, TASK_ID_MEMORY_CONTROL, @@ -478,7 +475,7 @@ const uint8_t G_tick14_seq[] = { const uint8_t G_tick15_seq[] = { TASK_ID_APSS_START, TASK_ID_GET_TOD, - //TASK_ID_GPU_SM, + TASK_ID_GPU_SM, TASK_ID_APSS_CONT, TASK_ID_CORE_DATA_HIGH, TASK_ID_APSS_DONE, diff --git a/src/occ_gpe0/gpe0_main.c b/src/occ_gpe0/gpe0_main.c index f8094be7..50bf45ce 100644 --- a/src/occ_gpe0/gpe0_main.c +++ b/src/occ_gpe0/gpe0_main.c @@ -23,12 +23,6 @@ /* */ /* IBM_PROLOG_END_TAG */ -/// \file gpe0_main.c -/// \brief Sample program that creates and starts a thread -/// -/// This file demonstrates how to create a thread and run it. It also provides -/// an example of how to add traces to the code. - #include "pk.h" #include "pk_trace.h" #include "ipc_api.h" diff --git a/src/occ_gpe1/ipc_func_tables.c b/src/occ_gpe1/ipc_func_tables.c index d694e3e3..ea1d6f81 100644 --- a/src/occ_gpe1/ipc_func_tables.c +++ b/src/occ_gpe1/ipc_func_tables.c @@ -32,6 +32,10 @@ void gpe1_nop(ipc_msg_t* cmd, void* arg); void gpe_reset_mem_deadman(ipc_msg_t* cmd, void* arg); void gpe_24x7(ipc_msg_t* cmd, void* arg); void gpe_mem_power_control(ipc_msg_t* cmd, void* arg); + +#ifdef OCC_GPU_SUPPORT +void gpe_gpu_sm(ipc_msg_t* cmd, void* arg); +#else void gpe_gpu_sm(ipc_msg_t* cmd, void* arg) { // No GPU support. The 405 should only be calling this on OCC GPU supported @@ -55,6 +59,7 @@ void gpe_gpu_sm(ipc_msg_t* cmd, void* arg) pk_halt(); } } +#endif // Function table for multi target (common) functions IPC_MT_FUNC_TABLE_START