Skip to content

Commit

Permalink
fix #6214
Browse files Browse the repository at this point in the history
  • Loading branch information
rt committed Apr 30, 2019
1 parent 04345f2 commit 17688ff
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 108 deletions.
2 changes: 1 addition & 1 deletion rts/System/Platform/CrashHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace CrashHandler {
* Imho this is a better solution than adding yet another optional parameter to the Stacktrace interface because
* the parameter is specific to the needs of one platform.
*/
void SuspendedStacktrace(Threading::ThreadControls* ctls, const std::string& threadName);
void SuspendedStacktrace(Threading::ThreadControls* ctls, const char* threadName);
#else
bool InitImageHlpDll();
#endif
Expand Down
6 changes: 3 additions & 3 deletions rts/System/Platform/Linux/CrashHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,16 +841,16 @@ namespace CrashHandler
* Since the thread to be traced may be running, it requires a ThreadControls object in order to suspend/resume the thread.
* @brief RemoteStacktrace
*/
void SuspendedStacktrace(Threading::ThreadControls* ctls, const std::string& threadName)
void SuspendedStacktrace(Threading::ThreadControls* ctls, const char* threadName)
{
#if !(DEDICATED || UNIT_TEST)
Watchdog::ClearTimer();
#endif
assert(ctls != nullptr);
assert(ctls->handle != 0);
assert(!threadName.empty());
assert(threadName[0] != 0);

LOG_L(L_WARNING, "Suspended-thread Stacktrace (%s) for Spring %s:", threadName.c_str(), (SpringVersion::GetFull()).c_str());
LOG_L(L_WARNING, "Suspended-thread Stacktrace (%s) for Spring %s:", threadName, (SpringVersion::GetFull()).c_str());
LOG_L(L_DEBUG, "[%s][1]", __func__);

StackTrace stacktrace;
Expand Down
85 changes: 37 additions & 48 deletions rts/System/Platform/Linux/ThreadSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static LinuxThreadState GetLinuxThreadState(int tid)
std::fstream sfile;
sfile.open(filename, std::fstream::in);
if (sfile.fail()) {
LOG_L(L_WARNING, "GetLinuxThreadState could not query %s", filename);
LOG_L(L_WARNING, "[%s] could not query %s", __func__, filename);
sfile.close();
return LTS_UNKNOWN;
}
Expand All @@ -85,34 +85,30 @@ static LinuxThreadState GetLinuxThreadState(int tid)

static void ThreadSIGUSR1Handler(int signum, siginfo_t* info, void* pCtx)
{
int err = 0;

LOG_L(L_DEBUG, "ThreadSIGUSR1Handler[1]");

LOG_L(L_DEBUG, "[%s][1]", __func__);

// Fill in ucontext_t structure before locking, this allows stack walking...
const int err = getcontext(&(localThreadControls->ucontext));

err = getcontext(&(threadCtls->ucontext));
if (err != 0) {
LOG_L(L_ERROR, "Couldn't get thread context within suspend signal handler: %s", strerror(err));
LOG_L(L_ERROR, "[%s] couldn't get thread context within suspend signal handler: %s", __func__, strerror(err));
return;
}

// Change the "running" flag to false. Note that we don't own a lock on the suspend mutex, but in order to get here,
// it had to have been locked by some other thread.
threadCtls->running.store(false);
localThreadControls->running.store(false);

LOG_L(L_DEBUG, "ThreadSIGUSR1Handler[2]");
LOG_L(L_DEBUG, "[%s][2]", __func__);

// Wait on the mutex. This should block the thread.
{
threadCtls->mutSuspend.lock();
threadCtls->running.store(true);
threadCtls->mutSuspend.unlock();
localThreadControls->mutSuspend.lock();
localThreadControls->running.store(true);
localThreadControls->mutSuspend.unlock();
}

LOG_L(L_DEBUG, "ThreadSIGUSR1Handler[3]");

LOG_L(L_DEBUG, "[%s][3]", __func__);
}


Expand All @@ -127,7 +123,7 @@ static bool SetThreadSignalHandler()
err = pthread_sigmask(SIG_UNBLOCK, &sigSet, nullptr);

if (err != 0) {
LOG_L(L_FATAL, "Error while setting new pthread's signal mask: %s", strerror(err));
LOG_L(L_FATAL, "[%s] error while setting new pthread's signal mask: %s", __func__, strerror(err));
return false;
}

Expand All @@ -137,39 +133,35 @@ static bool SetThreadSignalHandler()
sa.sa_flags |= SA_SIGINFO;

if (sigaction(SIGUSR1, &sa, nullptr)) {
LOG_L(L_FATAL,"Error while installing pthread SIGUSR1 handler.");
LOG_L(L_FATAL, "[%s] error while installing pthread SIGUSR1 handler", __func__);
return false;
}

return true;
}


void SetCurrentThreadControls(bool isLoadThread)
void SetupCurrentThreadControls(std::shared_ptr<ThreadControls>& threadCtls)
{
assert(!Threading::IsWatchDogThread());

#ifndef WIN32
if (isLoadThread) {
if (threadCtls.get() != nullptr) {
// do nothing if Load is actually Main (LoadingMT=0 case)
if (GetCurrentThreadControls() != nullptr) {
if (Threading::IsGameLoadThread())
return;
}
}

if (threadCtls != nullptr) {
// old shared_ptr will be deleted by the reset below
LOG_L(L_WARNING, "Setting a ThreadControls object on a thread that already has such an object registered.");
LOG_L(L_WARNING, "[%s] thread already has ThreadControls installed", __func__);
} else {
// Installing new ThreadControls object, so install signal handler also
if (!SetThreadSignalHandler()) {
// new ThreadControls object, so install SIGUSR1 signal handler also
if (!SetThreadSignalHandler())
return;
}
}

{
threadCtls.reset(new Threading::ThreadControls());

assert(threadCtls != nullptr);

threadCtls->handle = GetCurrentThread();
threadCtls->thread_id = gettid();
threadCtls->running.store(true);
Expand All @@ -184,22 +176,20 @@ void SetCurrentThreadControls(bool isLoadThread)
*/
void ThreadStart(
std::function<void()> taskFunc,
std::shared_ptr<ThreadControls>* ppCtlsReturn,
std::shared_ptr<ThreadControls>* threadCtls,
ThreadControls* tempCtls
) {
// Install the SIGUSR1 handler
SetCurrentThreadControls(false);

assert(threadCtls != nullptr);
// install the SIGUSR1 handler
SetupCurrentThreadControls(localThreadControls);

if (ppCtlsReturn != nullptr)
*ppCtlsReturn = threadCtls;
if (threadCtls != nullptr)
*threadCtls = localThreadControls;

{
// Lock the thread object so that users can't suspend/resume yet.
tempCtls->mutSuspend.lock();

LOG_L(L_DEBUG, "ThreadStart(): New thread's handle is %.4lx", threadCtls->handle);
LOG_L(L_DEBUG, "[%s] new thread handle %.4lx", __func__, localThreadControls->handle);

// We are fully initialized, so notify the condition variable. The
// thread's parent will unblock in whatever function created this
Expand All @@ -214,42 +204,41 @@ void ThreadStart(
taskFunc();

// Finish up: change the thread's running state to false.
threadCtls->mutSuspend.lock();
threadCtls->running = false;
threadCtls->mutSuspend.unlock();
localThreadControls->mutSuspend.lock();
localThreadControls->running = false;
localThreadControls->mutSuspend.unlock();
}



SuspendResult ThreadControls::Suspend()
{
int err = 0;

// Return an error if the running flag is false.
if (!running) {
LOG_L(L_ERROR, "Cannot suspend if a thread's running flag is set to false. Refusing to suspend using pthread_kill.");
LOG_L(L_ERROR, "[ThreadControls::%s] cannot suspend if a thread's running flag is set to false, refusing to use pthread_kill", __func__);
return Threading::THREADERR_NOT_RUNNING;
}

mutSuspend.lock();

LOG_L(L_DEBUG, "Sending SIGUSR1 to 0x%lx", handle);
LOG_L(L_DEBUG, "[ThreadControls::%s] sending SIGUSR1 to 0x%lx", __func__, handle);

// Send signal to thread to trigger its handler
err = pthread_kill(handle, SIGUSR1);
const int err = pthread_kill(handle, SIGUSR1);

if (err != 0) {
LOG_L(L_ERROR, "Error while trying to send signal to suspend thread: %s", strerror(err));
LOG_L(L_ERROR, "[ThreadControls::%s] error while trying to send signal to suspend thread: %s", __func__, strerror(err));
return Threading::THREADERR_MISC;
}

// Before leaving this function, we need some kind of guarantee that the stalled thread is suspended, so spinwait until it is guaranteed.
// FIXME: this sort of spin-waiting inside the watchdog loop could be avoided by creating another worker thread
// inside SuspendedStacktrace itself to do the work of checking that the stalled thread has been suspended and performing the trace there.
LinuxThreadState tstate;
const int max_attempts = 40; // 40 attempts * 0.025s = 1 sec max.
constexpr int max_attempts = 40; // 40 attempts * 0.025s = 1 sec max.
for (int a = 0; a < max_attempts; a++) {
tstate = GetLinuxThreadState(thread_id);
if (tstate == LTS_SLEEP) break;
if ((tstate = GetLinuxThreadState(thread_id)) == LTS_SLEEP)
break;
}

return Threading::THREADERR_NONE;
Expand Down
53 changes: 23 additions & 30 deletions rts/System/Platform/Threading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,11 @@


namespace Threading {
enum {
THREAD_IDX_MAIN = 0,
THREAD_IDX_LOAD = 1,
THREAD_IDX_SND = 2,
THREAD_IDX_VFSI = 3,
THREAD_IDX_WDOG = 4,
THREAD_IDX_LAST = 5,
};
thread_local std::shared_ptr<ThreadControls> localThreadControls;

static NativeThreadId nativeThreadIDs[THREAD_IDX_LAST] = {};
static Error threadError;

thread_local std::shared_ptr<Threading::ThreadControls> threadCtls;

#if defined(__APPLE__) || defined(__FreeBSD__)
#elif defined(WIN32)
static DWORD_PTR cpusSystem = 0;
Expand Down Expand Up @@ -277,27 +268,19 @@ namespace Threading {
}



#ifndef WIN32
std::shared_ptr<ThreadControls> GetCurrentThreadControls()
{
// If there is no object registered, need to return an "empty" shared_ptr
if (threadCtls == nullptr)
return std::shared_ptr<ThreadControls>();

return threadCtls;
}
std::shared_ptr<ThreadControls> GetCurrentThreadControls() { return localThreadControls; }
#endif


spring::thread CreateNewThread(std::function<void()> taskFunc, std::shared_ptr<Threading::ThreadControls>* ppCtlsReturn)
spring::thread CreateNewThread(std::function<void()> taskFunc, std::shared_ptr<Threading::ThreadControls>* threadCtls)
{
#ifndef WIN32
// only used as locking mechanism, not installed by thread
Threading::ThreadControls tempCtls;

std::unique_lock<spring::mutex> lock(tempCtls.mutSuspend);
spring::thread localthread(std::bind(Threading::ThreadStart, taskFunc, ppCtlsReturn, &tempCtls));
spring::thread localthread(std::bind(Threading::ThreadStart, taskFunc, threadCtls, &tempCtls));

// wait so that we know the thread is running and fully initialized before returning
tempCtls.condInitialized.wait(lock);
Expand All @@ -310,25 +293,35 @@ namespace Threading {



static void SetThread(unsigned int threadIndex, bool setControls, bool isLoadThread) {
static void SetThreadID(unsigned int threadIndex) {
// NOTE:
// the ID's of LOAD and SND always have to be set unconditionally since
// those two threads are joined and respawned when reloading, KISS here
// (while other threads never call Set*Thread more than once making the
// is-cached flags redundant anyway)
nativeThreadIDs[threadIndex] = Threading::GetCurrentThreadId();

if (!setControls)
return;
switch (threadIndex) {
case THREAD_IDX_LOAD: {
if (IsMainThread())
return;
} break;
#ifndef WIN32
// both heartBeatThread and soundThread make use of CreateNewThread -> ThreadStart
// other threads under the eye of watchdog have their control structure setup here
case THREAD_IDX_SND : { return; } break;
#endif
case THREAD_IDX_WDOG: { return; } break;
}

SetCurrentThreadControls(isLoadThread);
SetupCurrentThreadControls(localThreadControls);
}

void SetMainThread() { SetThread(THREAD_IDX_MAIN, true, false); }
void SetGameLoadThread() { SetThread(THREAD_IDX_LOAD, true, true); }
void SetAudioThread() { SetThread(THREAD_IDX_SND , true, false); }
void SetFileSysThread() { SetThread(THREAD_IDX_VFSI, true, false); }
void SetWatchDogThread() { SetThread(THREAD_IDX_WDOG, false, false); }
void SetMainThread() { SetThreadID(THREAD_IDX_MAIN); }
void SetGameLoadThread() { SetThreadID(THREAD_IDX_LOAD); }
void SetAudioThread() { SetThreadID(THREAD_IDX_SND ); }
void SetFileSysThread() { SetThreadID(THREAD_IDX_VFSI); }
void SetWatchDogThread() { SetThreadID(THREAD_IDX_WDOG); }

bool IsMainThread(NativeThreadId threadID) { return NativeThreadIdsEqual(threadID, nativeThreadIDs[THREAD_IDX_MAIN]); }
bool IsMainThread( ) { return IsMainThread(Threading::GetCurrentThreadId()); }
Expand Down
Loading

0 comments on commit 17688ff

Please sign in to comment.