Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU LLVM Improvements #7944

Merged
merged 6 commits into from
Apr 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 38 additions & 1 deletion Utilities/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,44 @@ bool cfg::try_to_int64(s64* out, const std::string& value, s64 min, s64 max)

if (result < min || result > max)
{
if (out) cfg_log.error("cfg::try_to_int('%s'): out of bounds (%lld..%lld)", value, min, max);
if (out) cfg_log.error("cfg::try_to_int('%s'): out of bounds (%d..%d)", value, min, max);
return false;
}

if (out) *out = result;
return true;
}

std::vector<std::string> cfg::make_uint_range(u64 min, u64 max)
{
return {std::to_string(min), std::to_string(max)};
}

bool cfg::try_to_uint64(u64* out, const std::string& value, u64 min, u64 max)
{
u64 result;
const char* start = &value.front();
const char* end = &value.back() + 1;
int base = 10;

if (start[0] == '0' && (start[1] == 'x' || start[1] == 'X'))
{
// Limited hex support
base = 16;
start += 2;
}

const auto ret = std::from_chars(start, end, result, base);

if (ret.ec != std::errc() || ret.ptr != end)
{
if (out) cfg_log.error("cfg::try_to_int('%s'): invalid integer", value);
return false;
}

if (result < min || result > max)
{
if (out) cfg_log.error("cfg::try_to_int('%s'): out of bounds (%u..%u)", value, min, max);
return false;
}

Expand Down
83 changes: 82 additions & 1 deletion Utilities/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,26 @@ namespace cfg
// Convert string to signed integer
bool try_to_int64(s64* out, const std::string& value, s64 min, s64 max);

// Format min and max unsigned values
std::vector<std::string> make_uint_range(u64 min, u64 max);

// Convert string to unsigned integer
bool try_to_uint64(u64* out, const std::string& value, u64 min, u64 max);

// Internal hack
bool try_to_enum_value(u64* out, decltype(&fmt_class_string<int>::format) func, const std::string&);

// Internal hack
std::vector<std::string> try_to_enum_list(decltype(&fmt_class_string<int>::format) func);

// Config tree entry type.
enum class type : uint
enum class type : unsigned
{
node = 0, // cfg::node type
_bool, // cfg::_bool type
_enum, // cfg::_enum type
_int, // cfg::_int type
uint, // cfg::uint type
string, // cfg::string type
set, // cfg::set_entry type
log,
Expand Down Expand Up @@ -302,6 +309,80 @@ namespace cfg
// Alias for 64 bit int
using int64 = _int<INT64_MIN, INT64_MAX>;

// Unsigned 32/64-bit integer entry with custom Min/Max range.
template <u64 Min, u64 Max>
class uint final : public _base
{
static_assert(Min < Max, "Invalid cfg::uint range");

// Prefer 32 bit type if possible
using int_type = std::conditional_t<Max <= UINT32_MAX, u32, u64>;

atomic_t<int_type> m_value;

public:
int_type def;

// Expose range
static const u64 max = Max;
static const u64 min = Min;

uint(node* owner, const std::string& name, int_type def = std::max<int_type>(Min, 0), bool dynamic = false)
: _base(type::uint, owner, name, dynamic)
, m_value(def)
, def(def)
{
}

operator int_type() const
{
return m_value;
}

int_type get() const
{
return m_value;
}

void from_default() override
{
m_value = def;
}

std::string to_string() const override
{
return std::to_string(m_value);
}

bool from_string(const std::string& value, bool /*dynamic*/ = false) override
{
u64 result;
if (try_to_uint64(&result, value, Min, Max))
{
m_value = static_cast<int_type>(result);
return true;
}

return false;
}

void set(const u64& value)
{
m_value = static_cast<int_type>(value);
}

std::vector<std::string> to_list() const override
{
return make_uint_range(Min, Max);
}
};

// Alias for 32 bit uint
using uint32 = uint<0, UINT32_MAX>;

// Alias for 64 bit int
using uint64 = uint<0, UINT64_MAX>;

// Simple string entry with mutex
class string final : public _base
{
Expand Down
11 changes: 11 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2842,6 +2842,17 @@ struct fmt_unveil<llvm::TypeSize, void>
}
};

#ifndef _MSC_VER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif

template <>
inline llvm::Type* cpu_translator::get_type<__m128i>()
{
return llvm::VectorType::get(llvm::Type::getInt8Ty(m_context), 16);
}

#ifndef _MSC_VER
#pragma GCC diagnostic pop
#endif
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPUDisAsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class SPUDisAsm final : public PPCDisAsm
}
void DisAsm(std::string op, field_de_t de, const char* a1, const char* a2)
{
Write(fmt::format("%s %s", FixOp(op.append(BrIndirectSuffix(de))), a1, a2));
Write(fmt::format("%s %s,%s", FixOp(op.append(BrIndirectSuffix(de))), a1, a2));
}

public:
Expand Down
77 changes: 69 additions & 8 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,29 @@ void spu_cache::initialize()
const u32 start = func.lower_bound;
const u32 size0 = ::size32(func.data);

be_t<u64> hash_start;
{
sha1_context ctx;
u8 output[20];

sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
sha1_finish(&ctx, output);
std::memcpy(&hash_start, output, sizeof(hash_start));
}

// Check hash against allowed bounds
const bool inverse_bounds = g_cfg.core.spu_llvm_lower_bound > g_cfg.core.spu_llvm_upper_bound;

if ((!inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound || hash_start > g_cfg.core.spu_llvm_upper_bound)) ||
(inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound && hash_start > g_cfg.core.spu_llvm_upper_bound)))
{
spu_log.error("[Debug] Skipped function %s", fmt::base57(hash_start));
g_progr_pdone++;
result++;
continue;
}

// Initialize LS with function data only
for (u32 i = 0, pos = start; i < size0; i++, pos += 4)
{
Expand Down Expand Up @@ -3229,6 +3252,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Patchpoint unique id
u32 m_pp_id = 0;

// Next opcode
u32 m_next_op = 0;

// Current function (chunk)
llvm::Function* m_function;

Expand Down Expand Up @@ -4602,6 +4628,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
break;
}

// Set variable for set_link()
if (m_pos + 4 >= end)
m_next_op = 0;
else
m_next_op = func.data[(m_pos - start) / 4 + 1];

// Execute recompiler function (TODO)
(this->*decode(op))({op});
}
Expand Down Expand Up @@ -6462,10 +6494,25 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b));
}

static __m128i exec_rotqby(__m128i a, u8 b)
{
alignas(32) const __m128i buf[2]{a, a};
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(reinterpret_cast<const u8*>(buf) + (16 - (b & 0xf))));
}

void ROTQBY(spu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.ra);
const auto b = get_vr<u8[16]>(op.rb);

if (!m_use_ssse3)
{
value_t<u8[16]> r;
r.value = call("spu_rotqby", &exec_rotqby, a.value, eval(extract(b, 12)).value);
set_vr(op.rt, r);
return;
}

const auto sc = build<u8[16]>(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const auto sh = eval((sc - zshuffle(b, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12)) & 0xf);
set_vr(op.rt, pshufb(a, sh));
Expand Down Expand Up @@ -7649,13 +7696,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
{
const auto bswapped = zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
}

auto make_load_ls(value_t<u64> addr)
{
value_t<u8[16]> data;
data.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()));
data.value = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
return zshuffle(data, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
}

Expand Down Expand Up @@ -7891,23 +7938,27 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}

m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
const auto type = m_finfo->chunk->getFunctionType()->getPointerTo()->getPointerTo();

if (ret && g_cfg.core.spu_block_size >= spu_block_size_type::mega)
{
// Compare address stored in stack mirror with addr
const auto stack0 = eval(zext<u64>(sp) + ::offset32(&spu_thread::stack_mirror));
const auto stack1 = eval(stack0 + 8);
const auto _ret = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), type));
const auto _ret = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64*>()));
const auto link = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>()));
const auto fail = llvm::BasicBlock::Create(m_context, "", m_function);
const auto done = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(addr.value, m_ir->CreateTrunc(link, get_type<u32>())), done, fail, m_md_likely);
const auto next = llvm::BasicBlock::Create(m_context, "", m_function);
m_ir->CreateCondBr(m_ir->CreateICmpEQ(addr.value, m_ir->CreateTrunc(link, get_type<u32>())), next, fail, m_md_likely);
m_ir->SetInsertPoint(next);
const auto cmp2 = m_ir->CreateLoad(m_ir->CreateBitCast(m_ir->CreateGEP(m_lsptr, addr.value), get_type<u32*>()));
m_ir->CreateCondBr(m_ir->CreateICmpEQ(cmp2, m_ir->CreateTrunc(_ret, get_type<u32>())), done, fail, m_md_likely);
m_ir->SetInsertPoint(done);

// Clear stack mirror and return by tail call to the provided return address
m_ir->CreateStore(splat<u64[2]>(-1).eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64(*)[2]>()));
tail_chunk(_ret, m_ir->CreateTrunc(m_ir->CreateLShr(link, 32), get_type<u32>()));
const auto targ = m_ir->CreateAdd(m_ir->CreateLShr(_ret, 32), m_ir->getInt64(reinterpret_cast<u64>(jit_runtime::alloc(0, 0))));
tail_chunk(m_ir->CreateIntToPtr(targ, m_finfo->chunk->getFunctionType()->getPointerTo()), m_ir->CreateTrunc(m_ir->CreateLShr(link, 32), get_type<u32>()));
m_ir->SetInsertPoint(fail);
}

Expand Down Expand Up @@ -8271,8 +8322,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto pfunc = add_function(m_pos + 4);
const auto stack0 = eval(zext<u64>(extract(get_reg_fixed(1), 3) & 0x3fff0) + ::offset32(&spu_thread::stack_mirror));
const auto stack1 = eval(stack0 + 8);
const auto rel_ptr = m_ir->CreateSub(m_ir->CreatePtrToInt(pfunc->chunk, get_type<u64>()), m_ir->getInt64(reinterpret_cast<u64>(jit_runtime::alloc(0, 0))));
const auto ptr_plus_op = m_ir->CreateOr(m_ir->CreateShl(rel_ptr, 32), m_ir->getInt64(m_next_op));
const auto base_plus_pc = m_ir->CreateOr(m_ir->CreateShl(m_ir->CreateZExt(m_base_pc, get_type<u64>()), 32), m_ir->getInt64(m_pos + 4));
m_ir->CreateStore(pfunc->chunk, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), pfunc->chunk->getType()->getPointerTo()));
m_ir->CreateStore(ptr_plus_op, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack0.value), get_type<u64*>()));
m_ir->CreateStore(base_plus_pc, m_ir->CreateBitCast(m_ir->CreateGEP(m_thread, stack1.value), get_type<u64*>()));
}
}
Expand Down Expand Up @@ -8833,7 +8886,15 @@ struct spu_fast : public spu_recompiler_base
// Install pointer carefully
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);

if (added)
// Check hash against allowed bounds
const bool inverse_bounds = g_cfg.core.spu_llvm_lower_bound > g_cfg.core.spu_llvm_upper_bound;

if ((!inverse_bounds && (m_hash_start < g_cfg.core.spu_llvm_lower_bound || m_hash_start > g_cfg.core.spu_llvm_upper_bound)) ||
(inverse_bounds && (m_hash_start < g_cfg.core.spu_llvm_lower_bound && m_hash_start > g_cfg.core.spu_llvm_upper_bound)))
{
spu_log.error("[Debug] Skipped function %s", fmt::base57(be_t<u64>{m_hash_start}));
}
else if (added)
{
// Send work to LLVM compiler thread
g_fxo->get<spu_llvm_thread>()->registered.push(m_hash_start, add_loc);
Expand Down
16 changes: 9 additions & 7 deletions rpcs3/Emu/system_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct cfg_root : cfg::node
cfg::_bool hook_functions{ this, "Hook static functions" };
cfg::set_entry load_libraries{ this, "Load libraries" };
cfg::_bool hle_lwmutex{ this, "HLE lwmutex" }; // Force alternative lwmutex/lwcond implementation
cfg::uint64 spu_llvm_lower_bound{ this, "SPU LLVM Lower Bound" };
cfg::uint64 spu_llvm_upper_bound{ this, "SPU LLVM Upper Bound", 0xffff'ffff'ffff'ffff };

cfg::_int<10, 3000> clocks_scale{ this, "Clocks scale", 100, true }; // Changing this from 100 (percentage) may affect game speed in unexpected ways
cfg::_enum<sleep_timers_accuracy_level> sleep_timers_accuracy{ this, "Sleep Timers Accuracy",
Expand All @@ -75,12 +77,12 @@ struct cfg_root : cfg::node
cfg::string dev_usb000{ this, "/dev_usb000/", "$(EmulatorDir)dev_usb000/" };
cfg::string dev_bdvd{ this, "/dev_bdvd/" }; // Not mounted
cfg::string app_home{ this, "/app_home/" }; // Not mounted

std::string get_dev_flash() const
{
return get(dev_flash, "dev_flash/");
}

cfg::_bool host_root{ this, "Enable /host_root/" };
cfg::_bool init_dirs{ this, "Initialize Directories", true };

Expand Down Expand Up @@ -135,7 +137,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 30000000> driver_recovery_timeout{ this, "Driver Recovery Timeout", 1000000, true };
cfg::_int<0, 16667> driver_wakeup_delay{ this, "Driver Wake-Up Delay", 1, true };
cfg::_int<1, 1800> vblank_rate{ this, "Vblank Rate", 60, true }; // Changing this from 60 may affect game speed in unexpected ways
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console
cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console

struct node_vk : cfg::node
{
Expand Down Expand Up @@ -252,7 +254,7 @@ struct cfg_root : cfg::node
cfg::_enum<np_psn_status> psn_status{this, "PSN status", np_psn_status::disabled};
cfg::string psn_npid{this, "NPID", ""};
} net{this};

struct node_misc : cfg::node
{
node_misc(cfg::node* _this) : cfg::node(_this, "Miscellaneous") {}
Expand All @@ -267,11 +269,11 @@ struct cfg_root : cfg::node
cfg::string gdb_server{ this, "GDB Server", "127.0.0.1:2345" };
cfg::_bool silence_all_logs{ this, "Silence All Logs", false, true };
cfg::string title_format{ this, "Window Title Format", "FPS: %F | %R | %V | %T [%t]", true };

} misc{ this };

cfg::log_entry log{ this, "Log" };

std::string name;
};

Expand Down