Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU Analyser improvements ("Giga") #5923

Merged
merged 3 commits into from May 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 5 additions & 20 deletions Utilities/JIT.cpp
Expand Up @@ -474,7 +474,7 @@ struct MemoryManager : llvm::RTDyldMemoryManager
s_unfire.push_front(std::make_pair(addr, size));
#endif

return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
return RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
}

void deregisterEHFrames() override
Expand Down Expand Up @@ -508,6 +508,10 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager

void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
#ifndef _WIN32
RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
s_unfire.push_front(std::make_pair(addr, size));
#endif
}

void deregisterEHFrames() override
Expand Down Expand Up @@ -770,25 +774,6 @@ jit_compiler::~jit_compiler()
{
}

bool jit_compiler::has_ssse3() const
{
if (m_cpu == "generic" ||
m_cpu == "k8" ||
m_cpu == "opteron" ||
m_cpu == "athlon64" ||
m_cpu == "athlon-fx" ||
m_cpu == "k8-sse3" ||
m_cpu == "opteron-sse3" ||
m_cpu == "athlon64-sse3" ||
m_cpu == "amdfam10" ||
m_cpu == "barcelona")
{
return false;
}

return true;
}

void jit_compiler::add(std::unique_ptr<llvm::Module> module, const std::string& path)
{
ObjectCache cache{path};
Expand Down
3 changes: 0 additions & 3 deletions Utilities/JIT.h
Expand Up @@ -142,9 +142,6 @@ class jit_compiler final
return *m_engine;
}

// Test SSSE3 feature
bool has_ssse3() const;

// Add module (path to obj cache dir)
void add(std::unique_ptr<llvm::Module> module, const std::string& path);

Expand Down
33 changes: 26 additions & 7 deletions Utilities/types.h
Expand Up @@ -10,6 +10,7 @@

#include <cstdint>
#include <cstddef>
#include <cstring>
#include <type_traits>
#include <utility>
#include <chrono>
Expand Down Expand Up @@ -365,6 +366,9 @@ struct alignas(16) s128
CHECK_SIZE_ALIGN(u128, 16, 16);
CHECK_SIZE_ALIGN(s128, 16, 16);

using f32 = float;
using f64 = double;

union alignas(2) f16
{
u16 _u16;
Expand All @@ -375,22 +379,28 @@ union alignas(2) f16
_u16 = raw;
}

explicit operator float() const
explicit operator f32() const
{
// See http://stackoverflow.com/a/26779139
// The conversion doesn't handle NaN/Inf
u32 raw = ((_u16 & 0x8000) << 16) | // Sign (just moved)
(((_u16 & 0x7c00) + 0x1C000) << 13) | // Exponent ( exp - 15 + 127)
((_u16 & 0x03FF) << 13); // Mantissa
return (float&)raw;

union
{
char data[4];
u32 data32;
f32 res;
};

data32 = raw;
return res;
}
};

CHECK_SIZE_ALIGN(f16, 2, 2);

using f32 = float;
using f64 = double;

template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
constexpr T align(const T& value, ullong align)
{
Expand All @@ -400,12 +410,21 @@ constexpr T align(const T& value, ullong align)
template <typename T, typename T2>
inline u32 offset32(T T2::*const mptr)
{
union
{
char data[sizeof(std::size_t)];
std::size_t data_;
u32 data32;
};

#ifdef _MSC_VER
static_assert(sizeof(mptr) == sizeof(u32), "Invalid pointer-to-member size");
return reinterpret_cast<const u32&>(mptr);
std::memcpy(data, &mptr, sizeof(u32));
return data32;
#elif __GNUG__
static_assert(sizeof(mptr) == sizeof(std::size_t), "Invalid pointer-to-member size");
return static_cast<u32>(reinterpret_cast<const std::size_t&>(mptr));
std::memcpy(data, &mptr, sizeof(std::size_t));
return data_;
#else
static_assert(sizeof(mptr) == 0, "Invalid pointer-to-member size");
#endif
Expand Down
47 changes: 47 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.cpp
Expand Up @@ -9,7 +9,54 @@ cpu_translator::cpu_translator(llvm::Module* module, bool is_be)
, m_module(module)
, m_is_be(is_be)
{
}

void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
{
m_context = context;
m_engine = &engine;

const auto cpu = m_engine->getTargetMachine()->getTargetCPU();

m_use_ssse3 = true;

// Test SSSE3 feature (TODO)
if (cpu == "generic" ||
cpu == "k8" ||
cpu == "opteron" ||
cpu == "athlon64" ||
cpu == "athlon-fx" ||
cpu == "k8-sse3" ||
cpu == "opteron-sse3" ||
cpu == "athlon64-sse3" ||
cpu == "amdfam10" ||
cpu == "barcelona")
{
m_use_ssse3 = false;
}
}

llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type)
{
uint s1 = type->getScalarSizeInBits();
uint s2 = val->getType()->getScalarSizeInBits();

if (type->isVectorTy())
s1 *= type->getVectorNumElements();
if (val->getType()->isVectorTy())
s2 *= val->getType()->getVectorNumElements();

if (s1 != s2)
{
fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2);
}

if (const auto c1 = llvm::dyn_cast<llvm::Constant>(val))
{
return verify(HERE, llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout()));
}

return m_ir->CreateBitCast(val, type);
}

template <>
Expand Down
73 changes: 73 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.h
Expand Up @@ -9,6 +9,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/ConstantFolding.h"
#ifdef _MSC_VER
#pragma warning(pop)
Expand All @@ -19,6 +20,8 @@
#include "../Utilities/StrFmt.h"
#include "../Utilities/BEType.h"
#include "../Utilities/BitField.h"
#include "../Utilities/Log.h"
#include "../Utilities/JIT.h"

#include <unordered_map>
#include <map>
Expand Down Expand Up @@ -47,6 +50,7 @@ struct llvm_value_t
static constexpr bool is_sint = false;
static constexpr bool is_uint = false;
static constexpr bool is_float = false;
static constexpr uint is_array = false;
static constexpr uint is_vector = false;
static constexpr uint is_pointer = false;

Expand Down Expand Up @@ -314,6 +318,7 @@ struct llvm_value_t<T*> : llvm_value_t<T>
static constexpr bool is_sint = false;
static constexpr bool is_uint = false;
static constexpr bool is_float = false;
static constexpr uint is_array = false;
static constexpr uint is_vector = false;
static constexpr uint is_pointer = llvm_value_t<T>::is_pointer + 1;

Expand All @@ -333,6 +338,7 @@ struct llvm_value_t<T[N]> : llvm_value_t<T>
using base = llvm_value_t<T>;
using base::base;

static constexpr uint is_array = 0;
static constexpr uint is_vector = N;
static constexpr uint is_pointer = 0;

Expand All @@ -342,6 +348,48 @@ struct llvm_value_t<T[N]> : llvm_value_t<T>
}
};

template <typename T, uint N>
struct llvm_value_t<T[0][N]> : llvm_value_t<T>
{
using type = T[0][N];
using base = llvm_value_t<T>;
using base::base;

static constexpr bool is_int = false;
static constexpr bool is_sint = false;
static constexpr bool is_uint = false;
static constexpr bool is_float = false;
static constexpr uint is_array = N;
static constexpr uint is_vector = false;
static constexpr uint is_pointer = false;

static llvm::Type* get_type(llvm::LLVMContext& context)
{
return llvm::ArrayType::get(llvm_value_t<T>::get_type(context), N);
}
};

template <typename T, uint V, uint N>
struct llvm_value_t<T[V][N]> : llvm_value_t<T[V]>
{
using type = T[V][N];
using base = llvm_value_t<T[V]>;
using base::base;

static constexpr bool is_int = false;
static constexpr bool is_sint = false;
static constexpr bool is_uint = false;
static constexpr bool is_float = false;
static constexpr uint is_array = N;
static constexpr uint is_vector = false;
static constexpr uint is_pointer = false;

static llvm::Type* get_type(llvm::LLVMContext& context)
{
return llvm::ArrayType::get(llvm_value_t<T[V]>::get_type(context), N);
}
};

template <typename T>
using llvm_expr_t = std::decay_t<T>;

Expand Down Expand Up @@ -2368,6 +2416,9 @@ class cpu_translator
// Module to which all generated code is output to
llvm::Module* m_module;

// Execution engine from JIT instance
llvm::ExecutionEngine* m_engine{};

// Endianness, affects vector element numbering (TODO)
bool m_is_be;

Expand All @@ -2377,6 +2428,8 @@ class cpu_translator
// IR builder
llvm::IRBuilder<>* m_ir;

void initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine);

public:
// Convert a C++ type to an LLVM type (TODO: remove)
template <typename T>
Expand Down Expand Up @@ -2421,6 +2474,26 @@ class cpu_translator
return result;
}

// Call external function: provide name and function pointer
template <typename RT, typename... FArgs, typename... Args>
llvm::CallInst* call(std::string_view lame, RT(*_func)(FArgs...), Args... args)
{
static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number");
const auto type = llvm::FunctionType::get(get_type<RT>(), {args->getType()...}, false);
const auto func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction({lame.data(), lame.size()}, type).getCallee());
m_engine->addGlobalMapping({lame.data(), lame.size()}, reinterpret_cast<std::uintptr_t>(_func));
return m_ir->CreateCall(func, {args...});
}

// Bitcast with immediate constant folding
llvm::Value* bitcast(llvm::Value* val, llvm::Type* type);

template <typename T>
llvm::Value* bitcast(llvm::Value* val)
{
return bitcast(val, get_type<T>());
}

template <typename T>
static llvm_placeholder_t<T> match()
{
Expand Down
10 changes: 9 additions & 1 deletion rpcs3/Emu/Cell/Common.h
Expand Up @@ -12,5 +12,13 @@ enum FPSCR_RN
// Get the exponent of a float
inline int fexpf(float x)
{
return ((u32&)x >> 23) & 0xFF;
union
{
char data[4];
u32 data32;
float arg;
};

arg = x;
return (data32 >> 23) & 0xFF;
}
3 changes: 2 additions & 1 deletion rpcs3/Emu/Cell/PPUInterpreter.cpp
Expand Up @@ -3,6 +3,7 @@
#include "PPUThread.h"
#include "PPUInterpreter.h"
#include "Utilities/asm.h"
#include "Emu/Cell/Common.h"

#include <cmath>

Expand Down Expand Up @@ -4677,7 +4678,7 @@ bool ppu_interpreter::MTFSB0(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::MTFSFI(ppu_thread& ppu, ppu_opcode_t op)
{
const u32 bf = op.crfd * 4;
if (bf != 4 * 4)
if (bf != 4 * 4)
{
// Do nothing on non-FPCC field (TODO)
LOG_WARNING(PPU, "MTFSFI(%d)", op.crfd);
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -1711,7 +1711,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());

// Initialize translator
PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.has_ssse3());
PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.get_engine());

// Define some types
const auto _void = Type::getVoidTy(jit.get_context());
Expand Down
3 changes: 1 addition & 2 deletions rpcs3/Emu/Cell/PPUThread.h
@@ -1,6 +1,5 @@
#pragma once

#include "Common.h"
#include "../CPU/CPUThread.h"
#include "../Memory/vm.h"
#include "Utilities/lockless.h"
Expand Down Expand Up @@ -79,7 +78,7 @@ class ppu_thread : public cpu_thread
result |= bit;
}

return result;
return result;
}

// Unpack CR bits
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -11,14 +11,13 @@ using namespace llvm;

const ppu_decoder<PPUTranslator> s_ppu_decoder;

PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, bool ssse3)
PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, ExecutionEngine& engine)
: cpu_translator(module, false)
, m_info(info)
, m_pure_attr(AttributeList::get(m_context, AttributeList::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadNone}))
{
// Bind context
m_context = context;
m_use_ssse3 = ssse3;
cpu_translator::initialize(context, engine);

// There is no weak linkage on JIT, so let's create variables with different names for each module part
const u32 gsuffix = m_info.name.empty() ? info.funcs[0].addr : info.funcs[0].addr - m_info.segs[0].addr;
Expand Down