RPCS3 · Nekotekina · May 11, 2019 · May 5, 2019 · May 10, 2019 · May 10, 2019
diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp
@@ -474,7 +474,7 @@ struct MemoryManager : llvm::RTDyldMemoryManager
 		s_unfire.push_front(std::make_pair(addr, size));
 #endif
 
-		return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
+		return RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
 	}
 
 	void deregisterEHFrames() override
@@ -508,6 +508,10 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager
 
 	void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
 	{
+#ifndef _WIN32
+		RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
+		s_unfire.push_front(std::make_pair(addr, size));
+#endif
 	}
 
 	void deregisterEHFrames() override
@@ -770,25 +774,6 @@ jit_compiler::~jit_compiler()
 {
 }
 
-bool jit_compiler::has_ssse3() const
-{
-	if (m_cpu == "generic" ||
-		m_cpu == "k8" ||
-		m_cpu == "opteron" ||
-		m_cpu == "athlon64" ||
-		m_cpu == "athlon-fx" ||
-		m_cpu == "k8-sse3" ||
-		m_cpu == "opteron-sse3" ||
-		m_cpu == "athlon64-sse3" ||
-		m_cpu == "amdfam10" ||
-		m_cpu == "barcelona")
-	{
-		return false;
-	}
-
-	return true;
-}
-
 void jit_compiler::add(std::unique_ptr<llvm::Module> module, const std::string& path)
 {
 	ObjectCache cache{path};

diff --git a/Utilities/JIT.h b/Utilities/JIT.h
@@ -142,9 +142,6 @@ class jit_compiler final
 		return *m_engine;
 	}
 
-	// Test SSSE3 feature
-	bool has_ssse3() const;
-
 	// Add module (path to obj cache dir)
 	void add(std::unique_ptr<llvm::Module> module, const std::string& path);
 

diff --git a/Utilities/types.h b/Utilities/types.h
@@ -10,6 +10,7 @@
 
 #include <cstdint>
 #include <cstddef>
+#include <cstring>
 #include <type_traits>
 #include <utility>
 #include <chrono>
@@ -365,6 +366,9 @@ struct alignas(16) s128
 CHECK_SIZE_ALIGN(u128, 16, 16);
 CHECK_SIZE_ALIGN(s128, 16, 16);
 
+using f32 = float;
+using f64 = double;
+
 union alignas(2) f16
 {
 	u16 _u16;
@@ -375,22 +379,28 @@ union alignas(2) f16
 		_u16 = raw;
 	}
 
-	explicit operator float() const
+	explicit operator f32() const
 	{
 		// See http://stackoverflow.com/a/26779139
 		// The conversion doesn't handle NaN/Inf
 		u32 raw = ((_u16 & 0x8000) << 16) |             // Sign (just moved)
 		          (((_u16 & 0x7c00) + 0x1C000) << 13) | // Exponent ( exp - 15 + 127)
 		          ((_u16 & 0x03FF) << 13);              // Mantissa
-		return (float&)raw;
+
+		union
+		{
+			char data[4];
+			u32 data32;
+			f32 res;
+		};
+
+		data32 = raw;
+		return res;
 	}
 };
 
 CHECK_SIZE_ALIGN(f16, 2, 2);
 
-using f32 = float;
-using f64 = double;
-
 template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
 constexpr T align(const T& value, ullong align)
 {
@@ -400,12 +410,21 @@ constexpr T align(const T& value, ullong align)
 template <typename T, typename T2>
 inline u32 offset32(T T2::*const mptr)
 {
+	union
+	{
+		char data[sizeof(std::size_t)];
+		std::size_t data_;
+		u32 data32;
+	};
+
 #ifdef _MSC_VER
 	static_assert(sizeof(mptr) == sizeof(u32), "Invalid pointer-to-member size");
-	return reinterpret_cast<const u32&>(mptr);
+	std::memcpy(data, &mptr, sizeof(u32));
+	return data32;
 #elif __GNUG__
 	static_assert(sizeof(mptr) == sizeof(std::size_t), "Invalid pointer-to-member size");
-	return static_cast<u32>(reinterpret_cast<const std::size_t&>(mptr));
+	std::memcpy(data, &mptr, sizeof(std::size_t));
+	return data_;
 #else
 	static_assert(sizeof(mptr) == 0, "Invalid pointer-to-member size");
 #endif

diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp
@@ -9,7 +9,54 @@ cpu_translator::cpu_translator(llvm::Module* module, bool is_be)
 	, m_module(module)
 	, m_is_be(is_be)
 {
+}
+
+void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
+{
+	m_context = context;
+	m_engine = &engine;
+
+	const auto cpu = m_engine->getTargetMachine()->getTargetCPU();
+
+	m_use_ssse3 = true;
+
+	// Test SSSE3 feature (TODO)
+	if (cpu == "generic" ||
+		cpu == "k8" ||
+		cpu == "opteron" ||
+		cpu == "athlon64" ||
+		cpu == "athlon-fx" ||
+		cpu == "k8-sse3" ||
+		cpu == "opteron-sse3" ||
+		cpu == "athlon64-sse3" ||
+		cpu == "amdfam10" ||
+		cpu == "barcelona")
+	{
+		m_use_ssse3 = false;
+	}
+}
+
+llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type)
+{
+	uint s1 = type->getScalarSizeInBits();
+	uint s2 = val->getType()->getScalarSizeInBits();
+
+	if (type->isVectorTy())
+		s1 *= type->getVectorNumElements();
+	if (val->getType()->isVectorTy())
+		s2 *= val->getType()->getVectorNumElements();
+
+	if (s1 != s2)
+	{
+		fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2);
+	}
+
+	if (const auto c1 = llvm::dyn_cast<llvm::Constant>(val))
+	{
+		return verify(HERE, llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout()));
+	}
 
+	return m_ir->CreateBitCast(val, type);
 }
 
 template <>

diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h
@@ -9,6 +9,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #ifdef _MSC_VER
 #pragma warning(pop)
@@ -19,6 +20,8 @@
 #include "../Utilities/StrFmt.h"
 #include "../Utilities/BEType.h"
 #include "../Utilities/BitField.h"
+#include "../Utilities/Log.h"
+#include "../Utilities/JIT.h"
 
 #include <unordered_map>
 #include <map>
@@ -47,6 +50,7 @@ struct llvm_value_t
 	static constexpr bool is_sint    = false;
 	static constexpr bool is_uint    = false;
 	static constexpr bool is_float   = false;
+	static constexpr uint is_array   = false;
 	static constexpr uint is_vector  = false;
 	static constexpr uint is_pointer = false;
 
@@ -314,6 +318,7 @@ struct llvm_value_t<T*> : llvm_value_t<T>
 	static constexpr bool is_sint    = false;
 	static constexpr bool is_uint    = false;
 	static constexpr bool is_float   = false;
+	static constexpr uint is_array   = false;
 	static constexpr uint is_vector  = false;
 	static constexpr uint is_pointer = llvm_value_t<T>::is_pointer + 1;
 
@@ -333,6 +338,7 @@ struct llvm_value_t<T[N]> : llvm_value_t<T>
 	using base = llvm_value_t<T>;
 	using base::base;
 
+	static constexpr uint is_array   = 0;
 	static constexpr uint is_vector  = N;
 	static constexpr uint is_pointer = 0;
 
@@ -342,6 +348,48 @@ struct llvm_value_t<T[N]> : llvm_value_t<T>
 	}
 };
 
+template <typename T, uint N>
+struct llvm_value_t<T[0][N]> : llvm_value_t<T>
+{
+	using type = T[0][N];
+	using base = llvm_value_t<T>;
+	using base::base;
+
+	static constexpr bool is_int     = false;
+	static constexpr bool is_sint    = false;
+	static constexpr bool is_uint    = false;
+	static constexpr bool is_float   = false;
+	static constexpr uint is_array   = N;
+	static constexpr uint is_vector  = false;
+	static constexpr uint is_pointer = false;
+
+	static llvm::Type* get_type(llvm::LLVMContext& context)
+	{
+		return llvm::ArrayType::get(llvm_value_t<T>::get_type(context), N);
+	}
+};
+
+template <typename T, uint V, uint N>
+struct llvm_value_t<T[V][N]> : llvm_value_t<T[V]>
+{
+	using type = T[V][N];
+	using base = llvm_value_t<T[V]>;
+	using base::base;
+
+	static constexpr bool is_int     = false;
+	static constexpr bool is_sint    = false;
+	static constexpr bool is_uint    = false;
+	static constexpr bool is_float   = false;
+	static constexpr uint is_array   = N;
+	static constexpr uint is_vector  = false;
+	static constexpr uint is_pointer = false;
+
+	static llvm::Type* get_type(llvm::LLVMContext& context)
+	{
+		return llvm::ArrayType::get(llvm_value_t<T[V]>::get_type(context), N);
+	}
+};
+
 template <typename T>
 using llvm_expr_t = std::decay_t<T>;
 
@@ -2368,6 +2416,9 @@ class cpu_translator
 	// Module to which all generated code is output to
 	llvm::Module* m_module;
 
+	// Execution engine from JIT instance
+	llvm::ExecutionEngine* m_engine{};
+
 	// Endianness, affects vector element numbering (TODO)
 	bool m_is_be;
 
@@ -2377,6 +2428,8 @@ class cpu_translator
 	// IR builder
 	llvm::IRBuilder<>* m_ir;
 
+	void initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine);
+
 public:
 	// Convert a C++ type to an LLVM type (TODO: remove)
 	template <typename T>
@@ -2421,6 +2474,26 @@ class cpu_translator
 		return result;
 	}
 
+	// Call external function: provide name and function pointer
+	template <typename RT, typename... FArgs, typename... Args>
+	llvm::CallInst* call(std::string_view lame, RT(*_func)(FArgs...), Args... args)
+	{
+		static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number");
+		const auto type = llvm::FunctionType::get(get_type<RT>(), {args->getType()...}, false);
+		const auto func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction({lame.data(), lame.size()}, type).getCallee());
+		m_engine->addGlobalMapping({lame.data(), lame.size()}, reinterpret_cast<std::uintptr_t>(_func));
+		return m_ir->CreateCall(func, {args...});
+	}
+
+	// Bitcast with immediate constant folding
+	llvm::Value* bitcast(llvm::Value* val, llvm::Type* type);
+
+	template <typename T>
+	llvm::Value* bitcast(llvm::Value* val)
+	{
+		return bitcast(val, get_type<T>());
+	}
+
 	template <typename T>
 	static llvm_placeholder_t<T> match()
 	{

diff --git a/rpcs3/Emu/Cell/Common.h b/rpcs3/Emu/Cell/Common.h
@@ -12,5 +12,13 @@ enum FPSCR_RN
 // Get the exponent of a float
 inline int fexpf(float x)
 {
-	return ((u32&)x >> 23) & 0xFF;
+	union
+	{
+		char data[4];
+		u32 data32;
+		float arg;
+	};
+
+	arg = x;
+	return (data32 >> 23) & 0xFF;
 }
diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp
@@ -3,6 +3,7 @@
 #include "PPUThread.h"
 #include "PPUInterpreter.h"
 #include "Utilities/asm.h"
+#include "Emu/Cell/Common.h"
 
 #include <cmath>
 
@@ -4677,7 +4678,7 @@ bool ppu_interpreter::MTFSB0(ppu_thread& ppu, ppu_opcode_t op)
 bool ppu_interpreter::MTFSFI(ppu_thread& ppu, ppu_opcode_t op)
 {
 	const u32 bf = op.crfd * 4;
-	if (bf != 4 * 4) 
+	if (bf != 4 * 4)
 	{
 		// Do nothing on non-FPCC field (TODO)
 		LOG_WARNING(PPU, "MTFSFI(%d)", op.crfd);

diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -1711,7 +1711,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
 	module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
 
 	// Initialize translator
-	PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.has_ssse3());
+	PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.get_engine());
 
 	// Define some types
 	const auto _void = Type::getVoidTy(jit.get_context());

diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include "Common.h"
 #include "../CPU/CPUThread.h"
 #include "../Memory/vm.h"
 #include "Utilities/lockless.h"
@@ -79,7 +78,7 @@ class ppu_thread : public cpu_thread
 				result |= bit;
 			}
 
-			return result;	
+			return result;
 		}
 
 		// Unpack CR bits

diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp
@@ -11,14 +11,13 @@ using namespace llvm;
 
 const ppu_decoder<PPUTranslator> s_ppu_decoder;
 
-PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, bool ssse3)
+PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, ExecutionEngine& engine)
 	: cpu_translator(module, false)
 	, m_info(info)
 	, m_pure_attr(AttributeList::get(m_context, AttributeList::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadNone}))
 {
 	// Bind context
-	m_context = context;
-	m_use_ssse3 = ssse3;
+	cpu_translator::initialize(context, engine);
 
 	// There is no weak linkage on JIT, so let's create variables with different names for each module part
 	const u32 gsuffix = m_info.name.empty() ? info.funcs[0].addr : info.funcs[0].addr - m_info.segs[0].addr;