Skip to content

Commit

Permalink
Debug improvements for exceptions and OOM hangs
Browse files Browse the repository at this point in the history
There are two main changes in this commit:
1) Forcing an assert if we cannot allocate pages after
10,000 attempts to yield.
2) Adding a backtrace for a lot of exception paths.

Change-Id: I755ada753b78abed56e553f7c669f0f98ae68700
Reviewed-on: http://rchgit01.rchland.ibm.com/gerrit1/60691
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
dcrowell77 committed Jul 24, 2018
1 parent 19a4dfe commit f4a736d
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 17 deletions.
5 changes: 3 additions & 2 deletions src/include/arch/ppc.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] Google Inc. */
/* [+] International Business Machines Corp. */
/* */
Expand Down Expand Up @@ -434,10 +434,11 @@ ALWAYS_INLINE
inline void MAGIC_INSTRUCTION(int _n)
{
register int n = _n;
isync();
asm volatile("rlwimi %0,%0,0,%1,%2" \
:: "i" (((n) >> 8) & 0x1f), \
"i" (((n) >> 4) & 0xf), \
"i" ((((n) >> 0) & 0xf) | 16));
"i" ((((n) >> 0) & 0xf) | 16)); \
}

// Arguments to MAGIC_INSTRUCTION().
Expand Down
16 changes: 14 additions & 2 deletions src/include/kernel/misc.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2011,2017 */
/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -178,7 +178,7 @@ namespace KernelMisc
*/
void populate_cache_lines(uint64_t* i_start, uint64_t* i_end);

/** @fn set
/** @fn set scratch reg
*
* @brief Update value of scratch register to specified data.
*
Expand All @@ -195,5 +195,17 @@ namespace KernelMisc
*/
void updateScratchReg(MMIO_Scratch_Register scratch_addr, uint64_t data);

/** @fn Log backtrace to printk
*
* @brief Collect the backtrace for the given task and print an
* abbreviated version to the printk buffer.
*
* This can be used in task-crash paths for FFDC
*
* @param[in] task_t i_task - Task to operate against,
* if i_task==nullptr then call will use user-space interfaces
*/
void printkBacktrace(task_t* i_task);

};
#endif
6 changes: 4 additions & 2 deletions src/kernel/exception.C
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <kernel/terminate.H>
#include <kernel/hbterminatetypes.H>
#include <kernel/kernel_reasoncodes.H>
#include <kernel/misc.H>


namespace ExceptionHandles
Expand All @@ -62,7 +63,8 @@ void kernel_execute_prog_ex()
}
if (!handled)
{
printk("Program exception, killing task %d\n", t->tid);
printk( "Program exception, killing task %d, SRR0=0x%lX, SRR1=0x%lX\n",
t->tid, getSRR0(), getSRR1() );
MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
}
Expand Down Expand Up @@ -107,7 +109,7 @@ void kernel_execute_data_storage()
"Exception Type: %lx\n"
"Instruction where it occurred: %p\n",
t->tid, getDAR(), getDSISR(), t->context.nip);
MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
KernelMisc::printkBacktrace(t);
TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/kernel/machchk.C
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void setCheckstopData(uint64_t i_xstopAddr, uint64_t i_xstopData)
g_xstopRegPtr = reinterpret_cast<uint64_t*>(i_xstopAddr
|VmmManager::FORCE_PHYS_ADDR);
g_xstopRegValue = i_xstopData;
printk( "Set MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue );
printk( "Arm MchChk Xstop: %p=%.16lX\n", g_xstopRegPtr, g_xstopRegValue );

// Now that the machine check handler can do the xscom we
// can set MSR[ME]=1 to enable the regular machine check
Expand Down
45 changes: 45 additions & 0 deletions src/kernel/misc.C
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,51 @@ namespace KernelMisc
writeScratchReg(l_scratch_addr, data);
};

/**
* @brief Collect the backtrace for the given task and print an
*/
void printkBacktrace(task_t* i_task)
{
uint64_t* l_frame = nullptr;
uint32_t l_tid = 0;
bool l_kernelSpace = true;
if( i_task == nullptr ) //user-space
{
l_kernelSpace = false;
printk("U:");
l_frame = static_cast<uint64_t*>(framePointer());
l_tid = task_gettid();
}
else //kernel-space
{
printk("K:");
l_frame = reinterpret_cast<uint64_t*>( i_task->context.gprs[1] );
l_tid = i_task->tid;
}

printk("Backtrace for %d:\n ", l_tid );
printkd("frame=%p\n",l_frame);isync();
while (l_frame != NULL)
{
printkd("\nf=%p\n",l_frame); isync();
if( l_kernelSpace )
{
uint64_t* frame_p = reinterpret_cast<uint64_t*>
(VmmManager::findPhysicalAddress( reinterpret_cast<uint64_t>
(l_frame) ));
printkd("frame_p=%p\n",frame_p); isync();
l_frame = frame_p;
}
if( (0 != *l_frame) && (0 != l_frame[2]) )
{
printk( "<-0x%lX", l_frame[2] );
}

l_frame = reinterpret_cast<uint64_t*>(*l_frame);
}
printk("\n");
}


};

Expand Down
12 changes: 8 additions & 4 deletions src/kernel/pagemgr.C
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <kernel/memstate.H>
#include <kernel/bltohbdatamgr.H>
#include <usr/debugpointers.H>
#include <kernel/misc.H>


size_t PageManager::cv_coalesce_count = 0;
Expand Down Expand Up @@ -156,7 +157,7 @@ void* PageManager::allocatePage(size_t n, bool userspace)
// In non-kernel mode, make a system-call to allocate in kernel-mode.
if (!KernelMisc::in_kernel_mode())
{
size_t attempts = 0;
size_t l_attempts = 0;
while (NULL == page)
{
page = _syscall1(Systemcalls::MM_ALLOC_PAGES,
Expand All @@ -166,11 +167,14 @@ void* PageManager::allocatePage(size_t n, bool userspace)
// will eventually free up (ex. VMM flushes).
if (NULL == page)
{
attempts++;
if( attempts == 10000 ) //arbitrarily huge number
l_attempts++;
if( l_attempts == 10000 )
{
printk("Cannot allocate %ld pages\n", n);
printk( "Cannot allocate %ld pages to %d!\n",
n, task_gettid() );
MAGIC_INSTRUCTION(MAGIC_BREAK_ON_ERROR);
KernelMisc::printkBacktrace(nullptr);
task_crash();
}
task_yield();
}
Expand Down
6 changes: 4 additions & 2 deletions src/usr/testcore/kernel/taskwaittest.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* COPYRIGHT International Business Machines Corp. 2011,2014 */
/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
Expand Down Expand Up @@ -168,7 +170,7 @@ class TaskWaitTest : public CxxTest::TestSuite

static void* TaskThatCrashes(void* unused)
{
printk("Test case: Expect to see uncaught exception! ");
printk("TaskThatCrashes: Expect to see uncaught exception! ");
*(int64_t*)(0) = 0xDEADC0DE;
return NULL;
}
Expand Down
35 changes: 31 additions & 4 deletions src/usr/testcore/kernel/vmmbasetest.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* COPYRIGHT International Business Machines Corp. 2011,2014 */
/* Contributors Listed Below - COPYRIGHT 2011,2018 */
/* [+] International Business Machines Corp. */
/* */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
Expand Down Expand Up @@ -44,7 +46,7 @@ class VmmBaseTest : public CxxTest::TestSuite
{
int status;

printk("Test case: Expect to see uncaught exception! ");
printk("testNullAccess1: Expect to see uncaught exception! ");
tid_t child = task_create(readFromNULL, NULL);

if ((child != task_wait_tid(child, &status, NULL)) ||
Expand All @@ -53,7 +55,7 @@ class VmmBaseTest : public CxxTest::TestSuite
TS_FAIL("Write to NULL not caught.");
}

printk("Test case: Expect to see uncaught exception! ");
printk("testNullAccess2: Expect to see uncaught exception! ");
child = task_create(writeToNULL, NULL);
if ((child != task_wait_tid(child, &status, NULL)) ||
(status != TASK_STATUS_CRASHED))
Expand All @@ -66,7 +68,7 @@ class VmmBaseTest : public CxxTest::TestSuite
{
int status;

printk("Test case: Expect to see uncaught exception! ");
printk("testWriteToKernelCode: Expect to see uncaught exception! ");
tid_t child = task_create(writeToKernelCode, NULL);
if ((child != task_wait_tid(child, &status, NULL)) ||
(status != TASK_STATUS_CRASHED))
Expand Down Expand Up @@ -134,6 +136,21 @@ class VmmBaseTest : public CxxTest::TestSuite
}
}

void testHugeMalloc()
{
int status;

printk("testHugeMalloc: Expect to see uncaught exception! ");isync();

tid_t child = task_create(bigMalloc, NULL);

if ((child != task_wait_tid(child, &status, NULL)) ||
(status != TASK_STATUS_CRASHED))
{
TS_FAIL("testHugeMalloc> Giant malloc didn't crash.");
}
}

private:

static void* readFromNULL(void* unused)
Expand Down Expand Up @@ -180,6 +197,16 @@ class VmmBaseTest : public CxxTest::TestSuite
return NULL;
}

static void* bigMalloc(void* unused)
{
uint8_t* ptr = (uint8_t*)malloc(VMM_MEMORY_SIZE);
for( size_t x=0; x<(VMM_MEMORY_SIZE); x+=MEGABYTE )
{
ptr[x] = x;
}
return NULL;
}

};
msg_q_t VmmBaseTest::iv_mq = msg_q_create();

Expand Down

0 comments on commit f4a736d

Please sign in to comment.