Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU MFC: Implement MFC commands execution shuffling #8514

Merged
merged 1 commit into from Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions rpcs3/Emu/CPU/CPUThread.cpp
Expand Up @@ -739,6 +739,12 @@ bool cpu_thread::check_state() noexcept
cpu_counter::add(this);
}

if (state & cpu_flag::pending)
{
// Execute pending work
cpu_work();
}

if (retval)
{
cpu_on_stop();
Expand Down
4 changes: 4 additions & 0 deletions rpcs3/Emu/CPU/CPUThread.h
Expand Up @@ -21,6 +21,7 @@ enum class cpu_flag : u32
ret, // Callback return requested
signal, // Thread received a signal (HLE)
memory, // Thread must unlock memory mutex
pending, // Thread has postponed work

dbg_global_pause, // Emulation paused
dbg_pause, // Thread paused
Expand Down Expand Up @@ -169,6 +170,9 @@ class cpu_thread
// Callback for cpu_flag::suspend
virtual void cpu_sleep() {}

// Callback for cpu_flag::pending
virtual void cpu_work() {}

// Callback for cpu_flag::ret
virtual void cpu_return() {}

Expand Down
5 changes: 4 additions & 1 deletion rpcs3/Emu/Cell/SPUInterpreter.cpp
Expand Up @@ -116,7 +116,10 @@ void spu_interpreter::set_interrupt_status(spu_thread& spu, spu_opcode_t op)
spu.set_interrupt_status(false);
}

spu.check_mfc_interrupts(spu.pc);
if (spu.check_mfc_interrupts(spu.pc) && spu.state & cpu_flag::pending)
{
spu.do_mfc();
}
}


Expand Down
153 changes: 130 additions & 23 deletions rpcs3/Emu/Cell/SPUThread.cpp
Expand Up @@ -1655,6 +1655,18 @@ void spu_thread::cpu_task()
}
}

void spu_thread::cpu_work()
{
const auto timeout = +g_cfg.core.mfc_transfers_timeout;

// If either MFC size exceeds limit or timeout has been reached execute pending MFC commands
if (mfc_size > g_cfg.core.mfc_transfers_shuffling || (timeout && get_system_time() - mfc_last_timestamp >= timeout))
{
do_mfc(false);
check_mfc_interrupts(pc + 4);
}
}

struct raw_spu_cleanup
{
raw_spu_cleanup() = default;
Expand Down Expand Up @@ -2948,14 +2960,15 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
vm::reservation_notifier(addr).notify_all(-128);
}

void spu_thread::do_mfc(bool /*wait*/)
void spu_thread::do_mfc(bool can_escape)
{
u32 removed = 0;
u32 barrier = 0;
u32 fence = 0;
u16 exec_mask = 0;
bool pending = false;

// Process enqueued commands
static_cast<void>(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, [&](spu_mfc_cmd& args)
auto process_command = [&](spu_mfc_cmd& args)
{
// Select tag bit in the tag mask or the stall mask
const u32 mask = utils::rol32(1, args.tag);
Expand Down Expand Up @@ -2992,6 +3005,20 @@ void spu_thread::do_mfc(bool /*wait*/)
return false;
}

// If command is not enabled in execution mask, execute it later
if (!(exec_mask & (1u << (&args - mfc_queue))))
{
if (args.cmd & MFC_BARRIER_MASK)
{
barrier |= mask;
}

// Fence is set for any command
fence |= mask;
pending = true;
return false;
}

if (args.cmd & MFC_LIST_MASK)
{
if (!(args.tag & 0x80))
Expand Down Expand Up @@ -3028,31 +3055,63 @@ void spu_thread::do_mfc(bool /*wait*/)

removed++;
return true;
}));
};

mfc_size -= removed;
mfc_barrier = barrier;
mfc_fence = fence;
auto get_exec_mask = [&size = mfc_size]
{
// Get commands' execution mask
// Mask bits are always set when mfc_transfers_shuffling is 0
return static_cast<u16>((0 - (1u << std::min<u32>(g_cfg.core.mfc_transfers_shuffling, size))) | __rdtsc());
};

if (removed && ch_tag_upd)
// Process enqueued commands
while (true)
{
const u32 completed = get_mfc_completed();
removed = 0;
barrier = 0;
fence = 0;

// Shuffle commands execution (if enabled), explicit barriers are obeyed
pending = false;
exec_mask = get_exec_mask();

static_cast<void>(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, process_command));

if (completed && ch_tag_upd == MFC_TAG_UPDATE_ANY)
mfc_size -= removed;
mfc_barrier = barrier;
mfc_fence = fence;

if (removed && ch_tag_upd)
{
ch_tag_stat.set_value(completed);
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE;
const u32 completed = get_mfc_completed();

if (completed && ch_tag_upd == MFC_TAG_UPDATE_ANY)
{
ch_tag_stat.set_value(completed);
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE;
}
else if (completed == ch_tag_mask && ch_tag_upd == MFC_TAG_UPDATE_ALL)
{
ch_tag_stat.set_value(completed);
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE;
}
}

if (can_escape && check_mfc_interrupts(pc + 4))
{
spu_runtime::g_escape(this);
}
else if (completed == ch_tag_mask && ch_tag_upd == MFC_TAG_UPDATE_ALL)

if (!pending)
{
ch_tag_stat.set_value(completed);
ch_tag_upd = MFC_TAG_UPDATE_IMMEDIATE;
break;
}
}

if (check_mfc_interrupts(pc + 4))
if (state & cpu_flag::pending)
{
spu_runtime::g_escape(this);
// No more pending work
state -= cpu_flag::pending;
}
}

Expand Down Expand Up @@ -3109,6 +3168,15 @@ bool spu_thread::process_mfc_cmd()
// Stall infinitely if MFC queue is full
while (mfc_size >= 16) [[unlikely]]
{
// Reset MFC timestamp in the case of full queue
mfc_last_timestamp = 0;

// Process MFC commands
if (!test_stopped())
{
return false;
}

auto old = state.add_fetch(cpu_flag::wait);

if (is_stopped(old))
Expand Down Expand Up @@ -3382,12 +3450,18 @@ bool spu_thread::process_mfc_cmd()
{
if (do_dma_check(ch_mfc_cmd)) [[likely]]
{
if (ch_mfc_cmd.size)
if (!g_cfg.core.mfc_transfers_shuffling)
{
do_dma_transfer(this, ch_mfc_cmd, ls);
if (ch_mfc_cmd.size)
{
do_dma_transfer(this, ch_mfc_cmd, ls);
}

return true;
}

return true;
if (!state.test_and_set(cpu_flag::pending))
mfc_last_timestamp = get_system_time();
}

mfc_queue[mfc_size++] = ch_mfc_cmd;
Expand Down Expand Up @@ -3429,9 +3503,17 @@ bool spu_thread::process_mfc_cmd()

if (do_dma_check(cmd)) [[likely]]
{
if (!cmd.size || do_list_transfer(cmd)) [[likely]]
if (!g_cfg.core.mfc_transfers_shuffling)
{
return true;
if (!cmd.size || do_list_transfer(cmd)) [[likely]]
{
return true;
}
}
else
{
if (!state.test_and_set(cpu_flag::pending))
mfc_last_timestamp = get_system_time();
}
}

Expand All @@ -3445,6 +3527,7 @@ bool spu_thread::process_mfc_cmd()

if (check_mfc_interrupts(pc + 4))
{
do_mfc(false);
spu_runtime::g_escape(this);
}

Expand Down Expand Up @@ -3714,6 +3797,11 @@ s64 spu_thread::get_ch_value(u32 ch)
state += cpu_flag::wait + cpu_flag::temp;
}

if (state & cpu_flag::pending)
{
do_mfc();
}

for (int i = 0; i < 10 && channel.get_count() == 0; i++)
{
busy_wait();
Expand All @@ -3739,6 +3827,11 @@ s64 spu_thread::get_ch_value(u32 ch)

while (true)
{
if (state & cpu_flag::pending)
{
do_mfc();
}

for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++)
{
busy_wait();
Expand Down Expand Up @@ -3770,13 +3863,17 @@ s64 spu_thread::get_ch_value(u32 ch)

case MFC_RdTagStat:
{
if (state & cpu_flag::pending)
{
do_mfc();
}

if (u32 out; ch_tag_stat.try_read(out))
{
ch_tag_stat.set_value(0, false);
return out;
}

// Will stall infinitely
return read_channel(ch_tag_stat);
}

Expand Down Expand Up @@ -3929,6 +4026,11 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
{
if (get_type() >= spu_type::raw)
{
if (state & cpu_flag::pending)
{
do_mfc();
}

if (ch_out_intr_mbox.get_count())
{
state += cpu_flag::wait;
Expand Down Expand Up @@ -4060,6 +4162,11 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)

case SPU_WrOutMbox:
{
if (state & cpu_flag::pending)
{
do_mfc();
}

if (ch_out_mbox.get_count())
{
state += cpu_flag::wait;
Expand Down
6 changes: 5 additions & 1 deletion rpcs3/Emu/Cell/SPUThread.h
Expand Up @@ -631,6 +631,7 @@ class spu_thread : public cpu_thread
virtual std::string dump_misc() const override;
virtual void cpu_task() override final;
virtual void cpu_return() override;
virtual void cpu_work() override;
virtual ~spu_thread() override;
void cleanup();
void cpu_init();
Expand Down Expand Up @@ -668,6 +669,9 @@ class spu_thread : public cpu_thread
u32 mfc_barrier = -1;
u32 mfc_fence = -1;

// Timestamp of the first postponed command (transfers shuffling related)
u64 mfc_last_timestamp = 0;

// MFC proxy command data
spu_mfc_cmd mfc_prxy_cmd;
shared_mutex mfc_prxy_mtx;
Expand Down Expand Up @@ -787,7 +791,7 @@ class spu_thread : public cpu_thread
bool do_list_transfer(spu_mfc_cmd& args);
void do_putlluc(const spu_mfc_cmd& args);
bool do_putllc(const spu_mfc_cmd& args);
void do_mfc(bool wait = true);
void do_mfc(bool can_escape = true);
u32 get_mfc_completed() const;

bool process_mfc_cmd();
Expand Down
2 changes: 2 additions & 0 deletions rpcs3/Emu/system_config.h
Expand Up @@ -47,6 +47,8 @@ struct cfg_root : cfg::node
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
cfg::_bool spu_cache{ this, "SPU Cache", true };
cfg::_bool spu_prof{ this, "SPU Profiler", false };
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Transfers Shuffling Max Commands", 0 };
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Transfers Timeout", 0, true};
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", has_rtm() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
Expand Down