From 5d844b1ebedbf831b9c9db6a6aa990604f859293 Mon Sep 17 00:00:00 2001 From: Matthew Kennedy Date: Thu, 7 Jul 2022 20:49:02 -0700 Subject: [PATCH] Error handling cleanup (#4332) * dead code in stm32 * rusefi.cpp can have some noreturn * handle lua panic * unused error codes * simplifications of error_handling.cpp * comment * put that back * guard --- firmware/controllers/algo/obd_error_codes.h | 4 +- firmware/controllers/core/error_handling.cpp | 45 +++--- firmware/controllers/lua/lua.cpp | 9 ++ .../hw_layer/ports/stm32/stm32_common.cpp | 137 ------------------ firmware/rusefi.cpp | 5 +- firmware/rusefi.h | 4 +- 6 files changed, 34 insertions(+), 170 deletions(-) diff --git a/firmware/controllers/algo/obd_error_codes.h b/firmware/controllers/algo/obd_error_codes.h index fc47a4e18ca..15bb6a1dfda 100644 --- a/firmware/controllers/algo/obd_error_codes.h +++ b/firmware/controllers/algo/obd_error_codes.h @@ -1953,8 +1953,8 @@ typedef enum { CUSTOM_ERR_6605 = 6605, CUSTOM_ERR_6606 = 6606, CUSTOM_APPEND_STACK = 6607, - CUSTOM_RM_STACK_1 = 6608, - CUSTOM_RM_STACK = 6609, + CUSTOM_ERR_6608 = 6608, + CUSTOM_ERR_6609 = 6609, CUSTOM_ERR_6610 = 6610, CUSTOM_ERR_6611 = 6611, diff --git a/firmware/controllers/core/error_handling.cpp b/firmware/controllers/core/error_handling.cpp index dee55c1eb57..4b72299a358 100644 --- a/firmware/controllers/core/error_handling.cpp +++ b/firmware/controllers/core/error_handling.cpp @@ -66,6 +66,7 @@ void checkLastBootError() { void logHardFault(uint32_t type, uintptr_t faultAddress, port_extctx* ctx, uint32_t csfr) { auto sramState = getBackupSram(); sramState->Cookie = ErrorCookie::HardFault; + sramState->FaultType = type; sramState->FaultAddress = faultAddress; sramState->Csfr = csfr; memcpy(&sramState->FaultCtx, ctx, sizeof(port_extctx)); @@ -74,15 +75,6 @@ void logHardFault(uint32_t type, uintptr_t faultAddress, port_extctx* ctx, uint3 extern ioportid_t criticalErrorLedPort; extern ioportmask_t criticalErrorLedPin; extern uint8_t criticalErrorLedState; - -/** - * low-level function is used here to reduce stack usage - */ - -#define ON_CRITICAL_ERROR() \ - palWritePad(criticalErrorLedPort, criticalErrorLedPin, criticalErrorLedState); \ - turnAllPinsOff(); \ - enginePins.communicationLedPin.setValue(1); #endif /* EFI_PROD_CODE */ #if EFI_SIMULATOR || EFI_PROD_CODE @@ -96,12 +88,10 @@ void chDbgPanic3(const char *msg, const char * file, int line) { ch.dbg.panic_msg = msg; #endif /* CH_DBG_SYSTEM_STATE_CHECK */ -#if EFI_PROD_CODE - ON_CRITICAL_ERROR(); -#else +#if !EFI_PROD_CODE printf("chDbgPanic3 %s %s%d", msg, file, line); exit(-1); -#endif +#else // EFI_PROD_CODE #if EFI_HD44780_LCD lcdShowPanicMessage((char *) msg); @@ -109,20 +99,24 @@ void chDbgPanic3(const char *msg, const char * file, int line) { firmwareError(OBD_PCM_Processor_Fault, "assert fail %s %s:%d", msg, file, line); - // Force unlock, since we may be throwing-under-lock - chSysUnconditionalUnlock(); - - // there was a port_disable in chSysHalt, reenable interrupts so USB works - port_enable(); - // If on the main thread, longjmp back to the init process so we can keep USB alive if (chThdGetSelfX()->threadId == 0) { - void onAssertionFailure(); + // Force unlock, since we may be throwing-under-lock + chSysUnconditionalUnlock(); + + // there was a port_disable in chSysHalt, reenable interrupts so USB works + port_enable(); + + __NO_RETURN void onAssertionFailure(); onAssertionFailure(); } else { - // Not the main thread, simply try to terminate ourselves and let other threads continue living (so the user can diagnose, etc) - chThdTerminate(chThdGetSelfX()); + // Not the main thread. + // All hope is now lost. + // Reboot! + NVIC_SystemReset(); } + +#endif // EFI_PROD_CODE } #else @@ -244,8 +238,10 @@ void firmwareError(obd_code_e code, const char *fmt, ...) { chvsnprintf(warningBuffer, sizeof(warningBuffer), fmt, ap); va_end(ap); #endif - ON_CRITICAL_ERROR() - ; + palWritePad(criticalErrorLedPort, criticalErrorLedPin, criticalErrorLedState); + turnAllPinsOff(); + enginePins.communicationLedPin.setValue(1); + hasFirmwareErrorFlag = true; if (indexOf(fmt, '%') == -1) { /** @@ -288,4 +284,3 @@ void firmwareError(obd_code_e code, const char *fmt, ...) { #endif /* EFI_SIMULATOR */ #endif } - diff --git a/firmware/controllers/lua/lua.cpp b/firmware/controllers/lua/lua.cpp index d18404107ae..8bb19ae6a5d 100644 --- a/firmware/controllers/lua/lua.cpp +++ b/firmware/controllers/lua/lua.cpp @@ -149,6 +149,15 @@ static LuaHandle setupLuaState(lua_Alloc alloc) { return nullptr; } + lua_atpanic(ls, [](lua_State* l) { + firmwareError(OBD_PCM_Processor_Fault, "Lua panic: %s", lua_tostring(l, -1)); + + // hang the lua thread + while (true) ; + + return 0; + }); + // Load Lua's own libraries loadLibraries(ls); diff --git a/firmware/hw_layer/ports/stm32/stm32_common.cpp b/firmware/hw_layer/ports/stm32/stm32_common.cpp index 087fe6cdd68..4030ee8a713 100644 --- a/firmware/hw_layer/ports/stm32/stm32_common.cpp +++ b/firmware/hw_layer/ports/stm32/stm32_common.cpp @@ -406,19 +406,11 @@ void baseMCUInit(void) { BOR_Set(BOR_Level_1); // one step above default value } - -extern "C" { -void prvGetRegistersFromStack(uint32_t *pulFaultStackAddress); -} - extern uint32_t __main_stack_base__; -#define GET_CFSR() (*((volatile uint32_t *) (0xE000ED28))) - typedef struct port_intctx intctx_t; EXTERNC int getRemainingStack(thread_t *otp) { - #if CH_DBG_ENABLE_STACK_CHECK // this would dismiss coverity warning - see http://rusefi.com/forum/viewtopic.php?f=5&t=655 // coverity[uninit_use] @@ -440,135 +432,6 @@ EXTERNC int getRemainingStack(thread_t *otp) { #endif /* CH_DBG_ENABLE_STACK_CHECK */ } -void _unhandled_exception(void) { -/*lint -restore*/ - - chDbgPanic3("_unhandled_exception", __FILE__, __LINE__); - while (true) { - } -} - -void DebugMonitorVector(void) { - chDbgPanic3("DebugMonitorVector", __FILE__, __LINE__); - while (TRUE) - ; -} - -void UsageFaultVector(void) { - chDbgPanic3("UsageFaultVector", __FILE__, __LINE__); - while (TRUE) - ; -} - -void BusFaultVector(void) { - chDbgPanic3("BusFaultVector", __FILE__, __LINE__); - while (TRUE) { - } -} - -/** - + * @brief Register values for postmortem debugging. - + */ -volatile uint32_t postmortem_r0; -volatile uint32_t postmortem_r1; -volatile uint32_t postmortem_r2; -volatile uint32_t postmortem_r3; -volatile uint32_t postmortem_r12; -volatile uint32_t postmortem_lr; /* Link register. */ -volatile uint32_t postmortem_pc; /* Program counter. */ -volatile uint32_t postmortem_psr;/* Program status register. */ -volatile uint32_t postmortem_CFSR; -volatile uint32_t postmortem_HFSR; -volatile uint32_t postmortem_DFSR; -volatile uint32_t postmortem_AFSR; -volatile uint32_t postmortem_BFAR; -volatile uint32_t postmortem_MMAR; -volatile uint32_t postmortem_SCB_SHCSR; - -/** - * @brief Evaluates to TRUE if system runs under debugger control. - * @note This bit resets only by power reset. - */ -#define is_under_debugger() (((CoreDebug)->DHCSR) & \ - CoreDebug_DHCSR_C_DEBUGEN_Msk) - -/** - * - */ -void prvGetRegistersFromStack(uint32_t *pulFaultStackAddress) { - - postmortem_r0 = pulFaultStackAddress[0]; - postmortem_r1 = pulFaultStackAddress[1]; - postmortem_r2 = pulFaultStackAddress[2]; - postmortem_r3 = pulFaultStackAddress[3]; - postmortem_r12 = pulFaultStackAddress[4]; - postmortem_lr = pulFaultStackAddress[5]; - postmortem_pc = pulFaultStackAddress[6]; - postmortem_psr = pulFaultStackAddress[7]; - - /* Configurable Fault Status Register. Consists of MMSR, BFSR and UFSR */ - postmortem_CFSR = GET_CFSR(); - - /* Hard Fault Status Register */ - postmortem_HFSR = (*((volatile uint32_t *) (0xE000ED2C))); - - /* Debug Fault Status Register */ - postmortem_DFSR = (*((volatile uint32_t *) (0xE000ED30))); - - /* Auxiliary Fault Status Register */ - postmortem_AFSR = (*((volatile uint32_t *) (0xE000ED3C))); - - /* Read the Fault Address Registers. These may not contain valid values. - Check BFARVALID/MMARVALID to see if they are valid values - MemManage Fault Address Register */ - postmortem_MMAR = (*((volatile uint32_t *) (0xE000ED34))); - /* Bus Fault Address Register */ - postmortem_BFAR = (*((volatile uint32_t *) (0xE000ED38))); - - postmortem_SCB_SHCSR = SCB->SHCSR; - - if (is_under_debugger()) { - __asm("BKPT #0\n"); - // Break into the debugger - } - - /* harmless infinite loop */ - while (1) { - ; - } -} - -void HardFaultVector(void) { -#if 0 && defined __GNUC__ - __asm volatile ( - " tst lr, #4 \n" - " ite eq \n" - " mrseq r0, msp \n" - " mrsne r0, psp \n" - " ldr r1, [r0, #24] \n" - " ldr r2, handler2_address_const \n" - " bx r2 \n" - " handler2_address_const: .word prvGetRegistersFromStack \n" - ); - -#else -#endif /* 0 && defined __GNUC__ */ - - int cfsr = GET_CFSR(); - if (cfsr & 0x1) { - chDbgPanic3("H IACCVIOL", __FILE__, __LINE__); - } else if (cfsr & 0x100) { - chDbgPanic3("H IBUSERR", __FILE__, __LINE__); - } else if (cfsr & 0x20000) { - chDbgPanic3("H INVSTATE", __FILE__, __LINE__); - } else { - chDbgPanic3("HardFaultVector", __FILE__, __LINE__); - } - - while (TRUE) { - } -} - #if HAL_USE_SPI bool isSpiInitialized[5] = { false, false, false, false, false }; diff --git a/firmware/rusefi.cpp b/firmware/rusefi.cpp index 20277e68f89..34e3222a17b 100644 --- a/firmware/rusefi.cpp +++ b/firmware/rusefi.cpp @@ -170,10 +170,9 @@ void onAssertionFailure() { } void runRusEfiWithConfig(); -void runMainLoop(); +__NO_RETURN void runMainLoop(); void runRusEfi() { - efiAssertVoid(CUSTOM_RM_STACK_1, getCurrentRemainingStack() > 512, "init s"); engine->setConfig(); #if EFI_TEXT_LOGGING @@ -312,8 +311,6 @@ void runMainLoop() { * control is around main_trigger_callback */ while (true) { - efiAssertVoid(CUSTOM_RM_STACK, getCurrentRemainingStack() > 128, "stack#1"); - #if EFI_CLI_SUPPORT && !EFI_UART_ECHO_TEST_MODE // sensor state + all pending messages for our own rusEfi console // todo: is this mostly dead code? diff --git a/firmware/rusefi.h b/firmware/rusefi.h index c4fb86bc48a..9f12590f757 100644 --- a/firmware/rusefi.h +++ b/firmware/rusefi.h @@ -7,5 +7,5 @@ #pragma once -void runRusEfi(); -void rebootNow(); +__NO_RETURN void runRusEfi(); +__NO_RETURN void rebootNow();