Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gran Turismo Improvements #13917

Merged
merged 15 commits into from Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
64 changes: 35 additions & 29 deletions Utilities/Thread.cpp
Expand Up @@ -4,6 +4,7 @@
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/lv2/sys_mmapper.h"
#include "Emu/Cell/lv2/sys_event.h"
#include "Emu/Cell/lv2/sys_process.h"
#include "Emu/RSX/RSXThread.h"
#include "Thread.h"
#include "Utilities/JIT.h"
Expand Down Expand Up @@ -1514,7 +1515,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
}
}

if (pf_port_id)
if (auto pf_port = idm::get<lv2_obj, lv2_event_port>(pf_port_id); pf_port && pf_port->queue)
kd-11 marked this conversation as resolved.
Show resolved Hide resolved
{
// We notify the game that a page fault occurred so it can rectify it.
// Note, for data3, were the memory readable AND we got a page fault, it must be due to a write violation since reads are allowed.
Expand Down Expand Up @@ -1552,20 +1553,33 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
}
}

// Deschedule
if (cpu->id_type() == 1)
{
lv2_obj::sleep(*cpu);
}

// Now, place the page fault event onto table so that other functions [sys_mmapper_free_address and pagefault recovery funcs etc]
// know that this thread is page faulted and where.

auto& pf_events = g_fxo->get<page_fault_event_entries>();

// De-schedule
if (cpu->id_type() == 1)
{
cpu->state -= cpu_flag::signal; // Cannot use check_state here and signal must be removed if exists
lv2_obj::sleep(*cpu);
}

auto send_event = [&]() -> error_code
{
lv2_obj::notify_all_t notify_later{};

std::lock_guard pf_lock(pf_events.pf_mutex);

if (auto error = pf_port->queue->send(pf_port->name ? pf_port->name : ((u64{process_getpid() + 0u} << 32) | u64{pf_port_id}), data1, data2, data3))
{
return error;
}

pf_events.events.emplace(cpu, addr);
}
return {};
};

sig_log.warning("Page_fault %s location 0x%x because of %s memory", is_writing ? "writing" : "reading",
addr, data3 == SYS_MEMORY_PAGE_FAULT_CAUSE_READ_ONLY ? "writing read-only" : "using unmapped");
Expand All @@ -1578,13 +1592,12 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe
}
}

error_code sending_error = sys_event_port_send(pf_port_id, data1, data2, data3);
error_code sending_error = not_an_error(CELL_EBUSY);

// If we fail due to being busy, wait a bit and try again.
while (static_cast<u32>(sending_error) == CELL_EBUSY)
for (; static_cast<u32>(sending_error) == CELL_EBUSY; thread_ctrl::wait_for(1000))
{
thread_ctrl::wait_for(1000);
sending_error = sys_event_port_send(pf_port_id, data1, data2, data3);
sending_error = send_event();

if (cpu->is_stopped())
{
Expand Down Expand Up @@ -2647,26 +2660,24 @@ void thread_base::exec()
while (shared_ptr<thread_future> head = m_taskq.exchange(null_ptr))
{
// TODO: check if adapting reverse algorithm is feasible here
shared_ptr<thread_future>* prev{};
thread_future* prev_head{head.get()};

for (auto ptr = head.get(); ptr; ptr = ptr->next.get())
for (thread_future* prev{};;)
{
utils::prefetch_exec(ptr->exec.load());

ptr->prev = prev;
utils::prefetch_exec(prev_head->exec.load());

if (ptr->next)
if (auto next = prev_head->next.get())
{
prev = &ptr->next;
prev = std::exchange(prev_head, next);
prev_head->prev = prev;
}
else
{
break;
}
}

if (!prev)
{
prev = &head;
}

for (auto ptr = prev->get(); ptr; ptr = ptr->prev->get())
for (auto ptr = prev_head; ptr; ptr = ptr->prev)
{
if (auto task = ptr->exec.load()) [[likely]]
{
Expand All @@ -2690,11 +2701,6 @@ void thread_base::exec()
// Partial cleanup
ptr->next.reset();
}

if (!ptr->prev)
{
break;
}
}

if (!m_taskq) [[likely]]
Expand Down
2 changes: 1 addition & 1 deletion Utilities/Thread.h
Expand Up @@ -95,7 +95,7 @@ class thread_future

shared_ptr<thread_future> next{};

shared_ptr<thread_future>* prev{};
thread_future* prev{};

protected:
atomic_t<void(*)(thread_base*, thread_future*)> exec{};
Expand Down
29 changes: 24 additions & 5 deletions rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -1357,7 +1357,7 @@ std::string ppu_thread::dump_misc() const
fmt::append(ret, " (LV2 suspended)\n");
}

fmt::append(ret, "Priority: %d\n", +prio);
fmt::append(ret, "Priority: %d\n", prio.load().prio);
fmt::append(ret, "Stack: 0x%x..0x%x\n", stack_addr, stack_addr + stack_size - 1);
fmt::append(ret, "Joiner: %s\n", joiner.load());

Expand Down Expand Up @@ -1660,9 +1660,8 @@ ppu_thread::~ppu_thread()
perf_log.notice("Perf stats for instructions: total %u", exec_bytes / 4);
}

ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached)
ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 _prio, int detached)
: cpu_thread(idm::last_id())
, prio(prio)
, stack_size(param.stack_size)
, stack_addr(param.stack_addr)
, joiner(detached != 0 ? ppu_join_status::detached : ppu_join_status::joinable)
Expand All @@ -1671,6 +1670,8 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
, is_interrupt_thread(detached < 0)
, ppu_tname(make_single<std::string>(name))
{
prio.raw().prio = _prio;

gpr[1] = stack_addr + stack_size - ppu_stack_start_offset;

gpr[13] = param.tls_addr;
Expand Down Expand Up @@ -1732,7 +1733,25 @@ bool ppu_thread::savable() const

void ppu_thread::serialize_common(utils::serial& ar)
{
ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj, prio, optional_savestate_state, vr);
[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION(ar.is_writing(), ppu);

ar(gpr, fpr, cr, fpscr.bits, lr, ctr, vrsave, cia, xer, sat, nj);

if (ar.is_writing())
{
ar(prio.load().all);
}
else if (version < 2)
{
prio.raw().all = 0;
prio.raw().prio = ar.operator s32();
}
else
{
ar(prio.raw().all);
}

ar(optional_savestate_state, vr);

if (optional_savestate_state->data.empty())
{
Expand Down Expand Up @@ -3754,7 +3773,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");

// Write version, hash, CPU, settings
fmt::append(obj_name, "v5-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
}

if (Emu.IsStopped())
Expand Down
10 changes: 9 additions & 1 deletion rpcs3/Emu/Cell/PPUThread.h
Expand Up @@ -3,6 +3,7 @@
#include "../CPU/CPUThread.h"
#include "../Memory/vm_ptr.h"
#include "Utilities/lockless.h"
#include "Utilities/BitField.h"

#include "util/logs.hpp"
#include "util/v128.hpp"
Expand Down Expand Up @@ -253,7 +254,14 @@ class ppu_thread : public cpu_thread
alignas(64) std::byte rdata[128]{}; // Reservation data
bool use_full_rdata{};

atomic_t<s32> prio{0}; // Thread priority (0..3071)
union ppu_prio_t
{
u64 all;
bf_t<s64, 0, 13> prio; // Thread priority (0..3071) (firs 12-bits)
bf_t<s64, 13, 51> order; // Thread enqueue order (last 52-bits)
};

atomic_t<ppu_prio_t> prio{};
const u32 stack_size; // Stack size
const u32 stack_addr; // Stack address

Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -3293,8 +3293,8 @@ void PPUTranslator::LFSUX(ppu_opcode_t op)
void PPUTranslator::SYNC(ppu_opcode_t op)
{
// sync: Full seq cst barrier
// lwsync: Acq/Release barrier
m_ir->CreateFence(op.l10 ? AtomicOrdering::AcquireRelease : AtomicOrdering::SequentiallyConsistent);
// lwsync: Acq/Release barrier (but not really it seems from observing libsre.sprx)
m_ir->CreateFence(op.l10 && false ? AtomicOrdering::AcquireRelease : AtomicOrdering::SequentiallyConsistent);
}

void PPUTranslator::LFDX(ppu_opcode_t op)
Expand Down
14 changes: 8 additions & 6 deletions rpcs3/Emu/Cell/SPUThread.cpp
Expand Up @@ -5335,7 +5335,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)

case SPU_WrEventMask:
{
get_events(value);
get_events(value | static_cast<u32>(ch_events.load().mask));

if (ch_events.atomic_op([&](ch_events_t& events)
{
Expand All @@ -5347,7 +5347,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
return true;
}

return false;
return !!events.count;
}))
{
// Check interrupts in case count is 1
Expand All @@ -5363,7 +5363,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
case SPU_WrEventAck:
{
// "Collect" events before final acknowledgment
get_events(value);
get_events(value | static_cast<u32>(ch_events.load().mask));

bool freeze_dec = false;

Expand All @@ -5379,7 +5379,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
return true;
}

return false;
return !!events.count;
});

if (!is_dec_frozen && freeze_dec)
Expand Down Expand Up @@ -6157,11 +6157,13 @@ spu_thread::thread_name_t::operator std::string() const
return full_name;
}

spu_thread::priority_t::operator s32() const
spu_thread::spu_prio_t spu_thread::priority_t::load() const
{
if (_this->get_type() != spu_type::threaded || !_this->group->has_scheduler_context)
{
return s32{smax};
spu_thread::spu_prio_t prio{};
prio.prio = smax;
return prio;
}

return _this->group->prio;
Expand Down
17 changes: 15 additions & 2 deletions rpcs3/Emu/Cell/SPUThread.h
Expand Up @@ -887,12 +887,25 @@ class spu_thread : public cpu_thread
operator std::string() const;
} thread_name{ this };

union spu_prio_t
{
u64 all;
bf_t<s64, 0, 9> prio; // Thread priority (0..3071) (firs 9-bits)
bf_t<s64, 9, 55> order; // Thread enqueue order (TODO, last 52-bits)
};

// For lv2_obj::schedule<spu_thread>
const struct priority_t
struct priority_t
{
const spu_thread* _this;

operator s32() const;
spu_prio_t load() const;

template <typename Func>
auto atomic_op(Func&& func)
{
return static_cast<std::conditional_t<std::is_void_v<Func>, Func, decltype(_this->group)>>(_this->group)->prio.atomic_op(std::move(func));
}
} prio{ this };
};

Expand Down