Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finalize constexpr ppu_decoder<> #7850

Merged
merged 3 commits into from Mar 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Utilities/types.h
Expand Up @@ -46,7 +46,7 @@
#define ASSUME(...) do { if (!(__VA_ARGS__)) __builtin_unreachable(); } while (0) // note: the compiler will generate code to evaluate "cond" if the expression is opaque
#endif

#define SAFE_BUFFERS
#define SAFE_BUFFERS __attribute__((no_stack_protector))
#define NEVER_INLINE __attribute__((noinline))
#define FORCE_INLINE __attribute__((always_inline)) inline
#define RESTRICT __restrict__
Expand Down
91 changes: 21 additions & 70 deletions rpcs3/Emu/Cell/PPUInterpreter.cpp
Expand Up @@ -5,6 +5,7 @@
#include "Emu/system_config.h"
#include "PPUThread.h"
#include "Utilities/asm.h"
#include "Utilities/sysinfo.h"
#include "Emu/Cell/Common.h"

#include <cmath>
Expand All @@ -21,6 +22,8 @@
#define SSSE3_FUNC __attribute__((__target__("ssse3")))
#endif

const bool s_use_ssse3 = utils::has_ssse3();

inline u64 dup32(u32 x) { return x | static_cast<u64>(x) << 32; }

// Write values to CR field
Expand Down Expand Up @@ -123,7 +126,7 @@ extern __m128 sse_log2_ps(__m128 A)
return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8));
}

extern __m128i sse_pshufb(__m128i data, __m128i index)
extern SAFE_BUFFERS __m128i sse_pshufb(__m128i data, __m128i index)
{
v128 m = v128::fromV(_mm_and_si128(index, _mm_set1_epi8(0xf)));
v128 a = v128::fromV(data);
Expand All @@ -146,7 +149,7 @@ extern SSSE3_FUNC __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb));
}

extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C)
extern SAFE_BUFFERS __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C)
{
__m128i ab[2]{B, A};
v128 index = v128::fromV(_mm_andnot_si128(C, _mm_set1_epi8(0x1f)));
Expand Down Expand Up @@ -1427,15 +1430,11 @@ bool ppu_interpreter::VOR(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd].vi = sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true;
}

bool ppu_interpreter_fast::VPERM(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
ppu.vr[op.vd].vi = s_use_ssse3
? sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi)
: sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true;
}

Expand Down Expand Up @@ -3959,17 +3958,10 @@ bool ppu_interpreter::DIVW(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::LVLX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx_v0(addr);
return true;
}

bool ppu_interpreter_fast::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr);
ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvlx(addr) : sse_cellbe_lvlx_v0(addr);
return true;
}

Expand Down Expand Up @@ -4030,17 +4022,10 @@ bool ppu_interpreter::SRD(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::LVRX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx_v0(addr);
return true;
}

bool ppu_interpreter_fast::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr);
ppu.vr[op.vd].vi = s_use_ssse3 ? sse_cellbe_lvrx(addr) : sse_cellbe_lvrx_v0(addr);
return true;
}

Expand Down Expand Up @@ -4105,17 +4090,10 @@ bool ppu_interpreter::LFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
return true;
}

bool ppu_interpreter_fast::STVLX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi);
s_use_ssse3 ? sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
s_use_ssse3 ? sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
(s_use_ssse3 ? sse_cellbe_stvlx : sse_cellbe_stvlx_v0)(addr, ppu.vr[op.vs].vi);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider this style here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have bad experience with this, when only function pointer is selected like this. I checked generated code for current version in MSVC, it does common expression elimination anyway.

return true;
}

Expand Down Expand Up @@ -4160,17 +4138,10 @@ bool ppu_interpreter::STFSX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::STVRX(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi);
return true;
}

bool ppu_interpreter_fast::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi);
s_use_ssse3 ? sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi) : sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi);
return true;
}

Expand Down Expand Up @@ -4227,12 +4198,7 @@ bool ppu_interpreter::STFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVLX(ppu, op);
}

bool ppu_interpreter_fast::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVLX(ppu, op);
}
Expand Down Expand Up @@ -4282,12 +4248,7 @@ bool ppu_interpreter::SRAD(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVRX(ppu, op);
}

bool ppu_interpreter_fast::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVRX(ppu, op);
}
Expand Down Expand Up @@ -4324,12 +4285,7 @@ bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVLX(ppu, op);
}

bool ppu_interpreter_fast::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVLX(ppu, op);
}
Expand All @@ -4348,12 +4304,7 @@ bool ppu_interpreter::EXTSH(ppu_thread& ppu, ppu_opcode_t op)
return true;
}

bool ppu_interpreter_precise::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVRX(ppu, op);
}

bool ppu_interpreter_fast::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVRX(ppu, op);
}
Expand Down
22 changes: 6 additions & 16 deletions rpcs3/Emu/Cell/PPUInterpreter.h
Expand Up @@ -76,6 +76,7 @@ struct ppu_interpreter
static bool VNMSUBFP(ppu_thread&, ppu_opcode_t);
static bool VNOR(ppu_thread&, ppu_opcode_t);
static bool VOR(ppu_thread&, ppu_opcode_t);
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool VPKPX(ppu_thread&, ppu_opcode_t);
static bool VPKUHUM(ppu_thread&, ppu_opcode_t);
static bool VPKUWUM(ppu_thread&, ppu_opcode_t);
Expand Down Expand Up @@ -328,12 +329,6 @@ struct ppu_interpreter
static bool FCTIDZ(ppu_thread&, ppu_opcode_t);
static bool FCFID(ppu_thread&, ppu_opcode_t);

static bool UNK(ppu_thread&, ppu_opcode_t);
};

struct ppu_interpreter_precise final : ppu_interpreter
{
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
Expand All @@ -343,6 +338,11 @@ struct ppu_interpreter_precise final : ppu_interpreter
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);

static bool UNK(ppu_thread&, ppu_opcode_t);
};

struct ppu_interpreter_precise final : ppu_interpreter
{
static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
Expand Down Expand Up @@ -400,16 +400,6 @@ struct ppu_interpreter_precise final : ppu_interpreter

struct ppu_interpreter_fast final : ppu_interpreter
{
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
static bool LVRXL(ppu_thread&, ppu_opcode_t);
static bool STVLX(ppu_thread&, ppu_opcode_t);
static bool STVLXL(ppu_thread&, ppu_opcode_t);
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);

static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
Expand Down
6 changes: 0 additions & 6 deletions rpcs3/Emu/Cell/PPUOpcodes.h
Expand Up @@ -574,12 +574,6 @@ class ppu_decoder
});
}

template <typename F>
ppu_decoder(F&& init) : ppu_decoder()
{
init(m_table);
}

const std::array<T, 0x20000>& get_table() const
{
return m_table;
Expand Down
53 changes: 2 additions & 51 deletions rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -101,57 +101,8 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
});
}

// Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions
const std::pair<ppu_inter_func_t, ppu_inter_func_t> s_ppu_dispatch_table[]
{
#define FUNC(x) {&ppu_interpreter_precise::x, &ppu_interpreter_fast::x}
FUNC(VPERM),
FUNC(LVLX),
FUNC(LVLXL),
FUNC(LVRX),
FUNC(LVRXL),
FUNC(STVLX),
FUNC(STVLXL),
FUNC(STVRX),
FUNC(STVRXL),
#undef FUNC
};

static const ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise([](auto& table)
{
if (s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.first == func)
{
func = pair.second;
break;
}
}
}
}
});

static const ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast([](auto& table)
{
if (!s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.second == func)
{
func = pair.first;
break;
}
}
}
}
});
constexpr ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise;
constexpr ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast;

extern void ppu_initialize();
extern void ppu_initialize(const ppu_module& info);
Expand Down
1 change: 1 addition & 0 deletions rpcs3/main.cpp
Expand Up @@ -412,6 +412,7 @@ int main(int argc, char** argv)
parser.addOption(QCommandLineOption(arg_styles, "Lists the available styles."));
parser.addOption(QCommandLineOption(arg_style, "Loads a custom style.", "style", ""));
parser.addOption(QCommandLineOption(arg_stylesheet, "Loads a custom stylesheet.", "path", ""));
parser.addOption(QCommandLineOption(arg_updating, "For internal usage."));
parser.process(app->arguments());

// Don't start up the full rpcs3 gui if we just want the version or help.
Expand Down