Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cellSpurs: Fix HLE workload signalling, taskset fixes #9016

Merged
merged 1 commit into from Oct 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
108 changes: 81 additions & 27 deletions rpcs3/Emu/Cell/Modules/cellSpurs.cpp
Expand Up @@ -3700,15 +3700,37 @@ s32 _spurs::create_task(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id,
// TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000

u32 tmp_task_id;
for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++)
{
if (!taskset->enabled.value()._bit[tmp_task_id])
auto addr = taskset.ptr(&CellSpursTaskset::enabled).addr();
auto [res, rtime] = vm::reservation_lock(addr, 16, vm::dma_lockb);

// NOTE: Realfw processes this using 4 32-bits atomic loops
// But here its processed within a single 128-bit atomic op
vm::_ref<atomic_be_t<v128>>(addr).fetch_op([&](be_t<v128>& value)
{
auto enabled = taskset->enabled.value();
enabled._bit[tmp_task_id] = true;
taskset->enabled = enabled;
break;
}
auto value0 = value.value();

if (auto pos = std::countl_one(+value0._u64[0]); pos != 64)
{
tmp_task_id = pos;
value0._u64[0] |= (1ull << 63) >> pos;
value = value0;
return true;
}

if (auto pos = std::countl_one(+value0._u64[1]); pos != 64)
{
tmp_task_id = pos + 64;
value0._u64[1] |= (1ull << 63) >> pos;
value = value0;
return true;
}

tmp_task_id = CELL_SPURS_MAX_TASK;
return false;
});

res.release(rtime + 128);
}

if (tmp_task_id >= CELL_SPURS_MAX_TASK)
Expand All @@ -3730,13 +3752,14 @@ s32 _spurs::create_task(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id,

s32 _spurs::task_start(ppu_thread& ppu, vm::ptr<CellSpursTaskset> taskset, u32 taskId)
{
auto pendingReady = taskset->pending_ready.value();
pendingReady._bit[taskId] = true;
taskset->pending_ready = pendingReady;
auto [res, rtime] = vm::reservation_lock(taskset.ptr(&CellSpursTaskset::pending_ready).addr(), 16, vm::dma_lockb);
taskset->pending_ready.values[taskId / 32] |= (1u << 31) >> (taskId % 32);
res.release(rtime + 128);

cellSpursSendWorkloadSignal(ppu, taskset->spurs, taskset->wid);
auto spurs = +taskset->spurs;
ppu_execute<&cellSpursSendWorkloadSignal>(ppu, spurs, +taskset->wid);

if (s32 rc = cellSpursWakeUp(ppu, taskset->spurs))
if (s32 rc = ppu_execute<&cellSpursWakeUp>(ppu, spurs))
{
if (rc + 0u == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
{
Expand Down Expand Up @@ -3782,6 +3805,8 @@ s32 cellSpursCreateTask(ppu_thread& ppu, vm::ptr<CellSpursTaskset> taskset, vm::

s32 _cellSpursSendSignal(ppu_thread& ppu, vm::ptr<CellSpursTaskset> taskset, u32 taskId)
{
cellSpurs.trace("_cellSpursSendSignal(taskset=*0x%x, taskId=0x%x)", taskset, taskId);

if (!taskset)
{
return CELL_SPURS_TASK_ERROR_NULL_POINTER;
Expand All @@ -3797,30 +3822,59 @@ s32 _cellSpursSendSignal(ppu_thread& ppu, vm::ptr<CellSpursTaskset> taskset, u32
return CELL_SPURS_TASK_ERROR_INVAL;
}

be_t<v128> _0(v128::from32(0));
bool disabled = taskset->enabled.value()._bit[taskId];
auto invalid = (taskset->ready & taskset->pending_ready) != _0 || (taskset->running & taskset->waiting) != _0 || disabled ||
((taskset->running | taskset->ready | taskset->pending_ready | taskset->waiting | taskset->signalled) & ~taskset->enabled) != _0;

if (invalid)
int signal;
for (;;)
{
return CELL_SPURS_TASK_ERROR_SRCH;
const u32 addr = taskset.ptr(&CellSpursTaskset::signalled).ptr(&decltype(CellSpursTaskset::signalled)::values, taskId / 32).addr();
u32 signalled = ppu_lwarx(ppu, addr);

const u32 running = taskset->running.values[taskId / 32];
const u32 ready = taskset->ready.values[taskId / 32];
const u32 waiting = taskset->waiting.values[taskId / 32];
const u32 enabled = taskset->enabled.values[taskId / 32];
const u32 pready = taskset->pending_ready.values[taskId / 32];

const u32 mask = (1u << 31) >> (taskId % 32);

if ((running & waiting) || (ready & pready) ||
((signalled | waiting | pready | running | ready) & ~enabled) || !(enabled & mask))
{
// Error conditions:
// 1) Cannot have a waiting bit and running bit set at the same time
// 2) Cannot have a read bit and pending_ready bit at the same time
// 3) Any disabled bit in enabled mask must be not set
// 4) Specified task must be enabled
signal = -1;
}
else
{
signal = !!(~signalled & waiting & mask);
signalled |= (signal ? mask : 0);
}

if (ppu_stwcx(ppu, addr, signalled))
{
break;
}
}

auto shouldSignal = ((taskset->waiting & ~taskset->signalled) & be_t<v128>(v128::fromBit(taskId))) != _0 ? true : false;
auto signalled = taskset->signalled.value();
signalled._bit[taskId] = true;
taskset->signalled = signalled;
if (shouldSignal)
switch (signal)
{
cellSpursSendWorkloadSignal(ppu, taskset->spurs, taskset->wid);
auto rc = cellSpursWakeUp(ppu, taskset->spurs);
case 0: break;
case 1:
{
auto spurs = +taskset->spurs;

ppu_execute<&cellSpursSendWorkloadSignal>(ppu, spurs, +taskset->wid);
auto rc = ppu_execute<&cellSpursWakeUp>(ppu, spurs);
if (rc + 0u == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
{
return CELL_SPURS_TASK_ERROR_STAT;
}

ASSERT(rc == CELL_OK);
return rc;
}
default: return CELL_SPURS_TASK_ERROR_SRCH;
}

return CELL_OK;
Expand Down
22 changes: 16 additions & 6 deletions rpcs3/Emu/Cell/Modules/cellSpurs.h
Expand Up @@ -811,12 +811,22 @@ struct alignas(128) CellSpursTaskset

CHECK_SIZE(TaskInfo, 48);

be_t<v128> running; // 0x00
be_t<v128> ready; // 0x10
be_t<v128> pending_ready; // 0x20
be_t<v128> enabled; // 0x30
be_t<v128> signalled; // 0x40
be_t<v128> waiting; // 0x50
struct atomic_tasks_bitset
{
atomic_be_t<u32> values[4];

u32 get_bit(u32 bit) const
{
return values[bit / 32] & ((1u << 31) >> (bit % 32));
elad335 marked this conversation as resolved.
Show resolved Hide resolved
}
};

atomic_tasks_bitset running; // 0x00
atomic_tasks_bitset ready; // 0x10
atomic_tasks_bitset pending_ready; // 0x20
atomic_tasks_bitset enabled; // 0x30
atomic_tasks_bitset signalled; // 0x40
atomic_tasks_bitset waiting; // 0x50
vm::bptr<CellSpurs, u64> spurs; // 0x60
be_t<u64> args; // 0x68
u8 enable_clear_ls; // 0x70
Expand Down