Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to fix several issues #9310

Merged
merged 3 commits into from Nov 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions Utilities/BEType.h
Expand Up @@ -508,4 +508,7 @@ struct fmt_unveil<se_t<T, Se, Align>, void>
}
};

static_assert(be_t<u16>(1) + be_t<u32>(2) + be_t<u64>(3) == 6);
static_assert(le_t<u16>(1) + le_t<u32>(2) + le_t<u64>(3) == 6);

#endif // BETYPE_H_GUARD
152 changes: 86 additions & 66 deletions Utilities/Thread.cpp
Expand Up @@ -88,6 +88,7 @@ thread_local u64 g_tls_fault_rsx = 0;
thread_local u64 g_tls_fault_spu = 0;
thread_local u64 g_tls_wait_time = 0;
thread_local u64 g_tls_wait_fail = 0;
thread_local bool g_tls_access_violation_recovered = false;
extern thread_local std::string(*g_tls_log_prefix)();

template <>
Expand Down Expand Up @@ -1367,13 +1368,11 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no
return true;
}

thread_local bool access_violation_recovered = false;

// Hack: allocate memory in case the emulator is stopping
const auto hack_alloc = [&]()
{
// If failed the value remains true and std::terminate should be called
access_violation_recovered = true;
g_tls_access_violation_recovered = true;

const auto area = vm::reserve_map(vm::any, addr & -0x10000, 0x10000);

Expand Down Expand Up @@ -1525,7 +1524,7 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no

if (cpu->id_type() != 1)
{
if (!access_violation_recovered)
if (!g_tls_access_violation_recovered)
{
vm_log.notice("\n%s", cpu->dump_all());
vm_log.error("Access violation %s location 0x%x (%s) [type=u%u]", is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory", d_size * 8);
Expand Down Expand Up @@ -1556,14 +1555,14 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) no

Emu.Pause();

if (cpu && !access_violation_recovered)
if (cpu && !g_tls_access_violation_recovered)
{
vm_log.notice("\n%s", cpu->dump_all());
}

// Note: a thread may access violate more than once after hack_alloc recovery
// Do not log any further access violations in this case.
if (!access_violation_recovered)
if (!g_tls_access_violation_recovered)
{
vm_log.fatal("Access violation %s location 0x%x (%s) [type=u%u]", is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory", d_size * 8);
}
Expand Down Expand Up @@ -1850,7 +1849,7 @@ static atomic_t<u128, 64> s_thread_bits{0};

static atomic_t<thread_base**> s_thread_pool[128]{};

void thread_base::start(native_entry entry, void(*trampoline)())
void thread_base::start(native_entry entry)
{
for (u128 bits = s_thread_bits.load(); bits; bits &= bits - 1)
{
Expand All @@ -1869,16 +1868,13 @@ void thread_base::start(native_entry entry, void(*trampoline)())
}

// Send "this" and entry point
m_thread = reinterpret_cast<u64>(trampoline);
const u64 entry_val = reinterpret_cast<u64>(entry);
m_thread = entry_val;
atomic_storage<thread_base*>::release(*tls, this);
s_thread_pool[pos].notify_all();

// Wait for actual "m_thread" in return
while (m_thread == reinterpret_cast<u64>(trampoline))
{
busy_wait(300);
}

m_thread.wait(entry_val);
return;
}

Expand All @@ -1892,6 +1888,10 @@ void thread_base::start(native_entry entry, void(*trampoline)())

void thread_base::initialize(void (*error_cb)())
{
#ifndef _WIN32
m_thread.release(reinterpret_cast<u64>(pthread_self()));
#endif

// Initialize TLS variables
thread_ctrl::g_tls_this_thread = this;

Expand Down Expand Up @@ -2012,9 +2012,6 @@ u64 thread_base::finalize(thread_state result_state) noexcept

atomic_wait_engine::set_wait_callback(nullptr);

const u64 _self = m_thread;
m_thread.release(0);

// Return true if need to delete thread object (no)
const bool ok = 0 == (3 & ~m_sync.fetch_op([&](u64& v)
{
Expand All @@ -2026,48 +2023,55 @@ u64 thread_base::finalize(thread_state result_state) noexcept
m_sync.notify_all(2);

// No detached thread supported atm
return _self;
return m_thread;
}

void thread_base::finalize(u64 _self) noexcept
u64 thread_base::finalize(u64 _self) noexcept
{
g_tls_fault_all = 0;
g_tls_fault_rsx = 0;
g_tls_fault_spu = 0;
g_tls_wait_time = 0;
g_tls_wait_fail = 0;
g_tls_access_violation_recovered = false;

atomic_wait_engine::set_wait_callback(nullptr);
g_tls_log_prefix = []() -> std::string { return {}; };
thread_ctrl::g_tls_this_thread = nullptr;

if (!_self)
{
return;
return 0;
}

// Try to add self to thread pool
const auto [bits, ok] = s_thread_bits.fetch_op([](u128& bits)
set_name("..pool");

u32 pos = -1;

while (true)
{
if (~bits) [[likely]]
const auto [bits, ok] = s_thread_bits.fetch_op([](u128& bits)
{
// Set lowest clear bit
bits |= bits + 1;
return true;
}
if (~bits) [[likely]]
{
// Set lowest clear bit
bits |= bits + 1;
return true;
}

return false;
});
return false;
});

if (!ok)
{
#ifdef _WIN32
_endthread();
#else
pthread_detach(reinterpret_cast<pthread_t>(_self));
pthread_exit(0);
#endif
return;
}
if (ok) [[likely]]
{
pos = utils::ctz128(~bits);
break;
}

set_name("..pool");
s_thread_bits.wait(bits);
}

// Obtain id from atomic op
const u32 pos = utils::ctz128(~bits);
const auto tls = &thread_ctrl::g_tls_this_thread;
s_thread_pool[pos] = tls;

Expand All @@ -2082,30 +2086,47 @@ void thread_base::finalize(u64 _self) noexcept
val &= ~(u128(1) << pos);
});

s_thread_bits.notify_one();

// Restore thread id
const auto _this = atomic_storage<thread_base*>::load(*tls);
const auto entry = _this->m_thread.exchange(_self);
verify(HERE), entry != _self;
_this->m_thread.notify_one();

// Hack return address to avoid tail call
#ifdef _MSC_VER
*static_cast<u64*>(_AddressOfReturnAddress()) = entry;
#else
static_cast<u64*>(__builtin_frame_address(0))[1] = entry;
#endif
//reinterpret_cast<native_entry>(entry)(_this);
// Return new entry
return entry;
}

void (*thread_base::make_trampoline(native_entry entry))()
thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
{
return build_function_asm<void(*)()>([&](asmjit::X86Assembler& c, auto& args)
return build_function_asm<native_entry>([&](asmjit::X86Assembler& c, auto& args)
{
using namespace asmjit;

// Revert effect of ret instruction (fix stack alignment)
c.mov(x86::rax, imm_ptr(entry));
c.sub(x86::rsp, 8);
c.jmp(x86::rax);
Label _ret = c.newLabel();
c.push(x86::rbp);
c.sub(x86::rsp, 0x20);

// Call entry point (TODO: support for detached threads missing?)
c.call(imm_ptr(entry));

// Call finalize, return if zero
c.mov(args[0], x86::rax);
c.call(imm_ptr<u64(*)(u64)>(finalize));
c.test(x86::rax, x86::rax);
c.jz(_ret);

// Otherwise, call it as an entry point with first arg = new current thread
c.mov(x86::rbp, x86::rax);
c.call(imm_ptr(thread_ctrl::get_current));
c.mov(args[0], x86::rax);
c.add(x86::rsp, 0x28);
c.jmp(x86::rbp);

c.bind(_ret);
c.add(x86::rsp, 0x28);
c.ret();
});
}

Expand Down Expand Up @@ -2193,12 +2214,13 @@ thread_base::thread_base(std::string_view name)

thread_base::~thread_base()
{
if (u64 handle = m_thread.exchange(0))
// Only cleanup on errored status
if ((m_sync & 3) == 2)
{
#ifdef _WIN32
CloseHandle(reinterpret_cast<HANDLE>(handle));
CloseHandle(reinterpret_cast<HANDLE>(m_thread.load()));
#else
pthread_detach(reinterpret_cast<pthread_t>(handle));
pthread_join(reinterpret_cast<pthread_t>(m_thread.load()), nullptr);
#endif
}
}
Expand Down Expand Up @@ -2255,13 +2277,15 @@ u64 thread_base::get_native_id() const

u64 thread_base::get_cycles()
{
u64 cycles;
u64 cycles = 0;

const u64 handle = m_thread;

#ifdef _WIN32
if (QueryThreadCycleTime(reinterpret_cast<HANDLE>(m_thread.load()), &cycles))
if (QueryThreadCycleTime(reinterpret_cast<HANDLE>(handle), &cycles))
{
#elif __APPLE__
mach_port_name_t port = pthread_mach_thread_np(reinterpret_cast<pthread_t>(m_thread.load()));
mach_port_name_t port = pthread_mach_thread_np(reinterpret_cast<pthread_t>(handle));
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info;
kern_return_t ret = thread_info(port, THREAD_BASIC_INFO, reinterpret_cast<thread_info_t>(&info), &count);
Expand All @@ -2272,7 +2296,7 @@ u64 thread_base::get_cycles()
#else
clockid_t _clock;
struct timespec thread_time;
if (!pthread_getcpuclockid(reinterpret_cast<pthread_t>(m_thread.load()), &_clock) && !clock_gettime(_clock, &thread_time))
if (!pthread_getcpuclockid(reinterpret_cast<pthread_t>(handle), &_clock) && !clock_gettime(_clock, &thread_time))
{
cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec;
#endif
Expand Down Expand Up @@ -2317,19 +2341,15 @@ void thread_ctrl::emergency_exit(std::string_view reason)

if (!_self)
{
// Unused, detached thread support remnant
delete _this;
}

thread_base::finalize(0);

#ifdef _WIN32
_endthread();
_endthreadex(0);
#else
if (_self)
{
pthread_detach(reinterpret_cast<pthread_t>(_self));
}

pthread_exit(0);
#endif
}
Expand Down