Permalink
Browse files

new_dynarec: autodetect ARM integer divide feature at runtime

  • Loading branch information...
Gillou68310
Gillou68310 committed Jan 21, 2015
1 parent a180d01 commit e250b48ccf3f774361442a60f3dbcd8780f4e77b
View
@@ -134,6 +134,7 @@ LOCAL_LDLIBS := -lz
ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
# Use for ARM7a:
LOCAL_SRC_FILES += $(SRCDIR)/r4300/new_dynarec/arm_cpu_features.cpp
LOCAL_SRC_FILES += $(SRCDIR)/r4300/new_dynarec/linkage_arm.S
LOCAL_CFLAGS += -DDYNAREC
LOCAL_CFLAGS += -DNEW_DYNAREC=3
@@ -142,6 +143,7 @@ ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
else ifeq ($(TARGET_ARCH_ABI), armeabi)
# Use for pre-ARM7a:
LOCAL_SRC_FILES += $(SRCDIR)/r4300/new_dynarec/arm_cpu_features.cpp
LOCAL_SRC_FILES += $(SRCDIR)/r4300/new_dynarec/linkage_arm.S
LOCAL_CFLAGS += -DARMv5_ONLY
LOCAL_CFLAGS += -DDYNAREC
@@ -0,0 +1,150 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Mupen64plus - arm_cpu_features.cpp *
* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
* Copyright (C) 2015 Gilles Siberlin *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include <fstream>
#include <sstream>
#include <string>
#include "arm_cpu_features.h"
extern "C" {
#include "api/callbacks.h"
}
arm_cpu_features_t arm_cpu_features;
const char procfile[] = "/proc/cpuinfo";
static unsigned char check_arm_cpu_feature(const std::string& feature)
{
const std::string marker = "Features\t: ";
std::string line;
std::ifstream file(procfile);
if (!file)
return 0;
while (std::getline(file, line))
{
if (line.find(marker) != std::string::npos)
{
std::stringstream line_stream(line);
std::string token;
while (std::getline(line_stream, token, ' '))
{
if (token == feature)
return 1;
}
}
}
return 0;
}
static unsigned char get_arm_cpu_implementer(void)
{
const std::string marker = "CPU implementer\t: ";
unsigned char implementer = 0;
std::string line;
std::ifstream file(procfile);
if (!file)
return 0;
while (std::getline(file, line))
{
if (line.find(marker) != std::string::npos)
{
line = line.substr(marker.length());
sscanf(line.c_str(), "0x%02hhx", &implementer);
break;
}
}
return implementer;
}
static unsigned short get_arm_cpu_part(void)
{
const std::string marker = "CPU part\t: ";
unsigned short part = 0;
std::string line;
std::ifstream file(procfile);
if (!file)
return 0;
while (std::getline(file, line))
{
if (line.find(marker) != std::string::npos)
{
line = line.substr(marker.length());
sscanf(line.c_str(), "0x%03hx", &part);
break;
}
}
return part;
}
void detect_arm_cpu_features(void)
{
arm_cpu_features.SWP = check_arm_cpu_feature("swp");
arm_cpu_features.Half = check_arm_cpu_feature("half");
arm_cpu_features.Thumb = check_arm_cpu_feature("thumb");
arm_cpu_features.FastMult = check_arm_cpu_feature("fastmult");
arm_cpu_features.VFP = check_arm_cpu_feature("vfp");
arm_cpu_features.EDSP = check_arm_cpu_feature("edsp");
arm_cpu_features.ThumbEE = check_arm_cpu_feature("thumbee");
arm_cpu_features.NEON = check_arm_cpu_feature("neon");
arm_cpu_features.VFPv3 = check_arm_cpu_feature("vfpv3");
arm_cpu_features.TLS = check_arm_cpu_feature("tls");
arm_cpu_features.VFPv4 = check_arm_cpu_feature("vfpv4");
arm_cpu_features.IDIVa = check_arm_cpu_feature("idiva");
arm_cpu_features.IDIVt = check_arm_cpu_feature("idivt");
// Qualcomm Krait supports IDIVa but it doesn't report it. Check for krait.
if (get_arm_cpu_implementer() == 0x51 && get_arm_cpu_part() == 0x6F)
arm_cpu_features.IDIVa = arm_cpu_features.IDIVt = 1;
}
void print_arm_cpu_features(void)
{
std::string arm_cpu_features_string;
arm_cpu_features_string="ARM CPU Features:";
if (arm_cpu_features.SWP) arm_cpu_features_string += " SWP";
if (arm_cpu_features.Half) arm_cpu_features_string += ", Half";
if (arm_cpu_features.Thumb) arm_cpu_features_string += ", Thumb";
if (arm_cpu_features.FastMult) arm_cpu_features_string += ", FastMult";
if (arm_cpu_features.VFP) arm_cpu_features_string += ", VFP";
if (arm_cpu_features.EDSP) arm_cpu_features_string += ", EDSP";
if (arm_cpu_features.ThumbEE) arm_cpu_features_string += ", ThumbEE";
if (arm_cpu_features.NEON) arm_cpu_features_string += ", NEON";
if (arm_cpu_features.VFPv3) arm_cpu_features_string += ", VFPv3";
if (arm_cpu_features.TLS) arm_cpu_features_string += ", TLS";
if (arm_cpu_features.VFPv4) arm_cpu_features_string += ", VFPv4";
if (arm_cpu_features.IDIVa) arm_cpu_features_string += ", IDIVa";
if (arm_cpu_features.IDIVt) arm_cpu_features_string += ", IDIVt";
DebugMessage(M64MSG_INFO, "%s", arm_cpu_features_string.c_str());
}
@@ -0,0 +1,54 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Mupen64plus - arm_cpu_features.h *
* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
* Copyright (C) 2015 Gilles Siberlin *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifndef ARM_CPU_FEATURES_H
#define ARM_CPU_FEATURES_H
typedef struct
{
unsigned char SWP;
unsigned char Half;
unsigned char Thumb;
unsigned char FastMult;
unsigned char VFP;
unsigned char EDSP;
unsigned char ThumbEE;
unsigned char NEON;
unsigned char VFPv3;
unsigned char TLS;
unsigned char VFPv4;
unsigned char IDIVa;
unsigned char IDIVt;
}arm_cpu_features_t;
#ifdef __cplusplus
extern "C" {
#endif
extern arm_cpu_features_t arm_cpu_features;
void detect_arm_cpu_features(void);
void print_arm_cpu_features(void);
#ifdef __cplusplus
}
#endif
#endif /* ARM_CPU_FEATURES_H */
@@ -2062,18 +2062,18 @@ static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
}
#ifdef HAVE_INTEGER_DIVIDE
static void emit_sdiv(u_int rs1,u_int rs2,u_int rt)
{
assert(arm_cpu_features.IDIVa);
assem_debug("sdiv %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
output_w32(0xe710f010|(rt<<16)|(rs2<<8)|rs1);
}
static void emit_udiv(u_int rs1,u_int rs2,u_int rt)
{
assert(arm_cpu_features.IDIVa);
assem_debug("udiv %s,%s,%s",regname[rt],regname[rs1],regname[rs2]);
output_w32(0xe730f010|(rt<<16)|(rs2<<8)|rs1);
}
#endif
static void emit_clz(int rs,int rt)
{
@@ -4133,32 +4133,36 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs)
signed char remainder=get_reg(i_regs->regmap,HIREG);
assert(quotient>=0);
assert(remainder>=0);
#ifdef HAVE_INTEGER_DIVIDE
emit_test(d2,d2);
emit_jeq((int)out+16); // Division by zero
emit_sdiv(d1,d2,quotient);
emit_mul(quotient,d2,remainder);
emit_sub(d1,remainder,remainder);
#else
emit_movs(d1,remainder);
emit_negmi(remainder,remainder);
emit_movs(d2,HOST_TEMPREG);
emit_jeq((int)out+52); // Division by zero
emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
emit_clz(HOST_TEMPREG,quotient);
emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
emit_orimm(quotient,1<<31,quotient);
emit_shr(quotient,quotient,quotient);
emit_cmp(remainder,HOST_TEMPREG);
emit_subcs(remainder,HOST_TEMPREG,remainder);
emit_adcs(quotient,quotient,quotient);
emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
emit_jcc((int)out-16); // -4
emit_teq(d1,d2);
emit_negmi(quotient,quotient);
emit_test(d1,d1);
emit_negmi(remainder,remainder);
#endif
if(arm_cpu_features.IDIVa)
{
emit_test(d2,d2);
emit_jeq((int)out+16); // Division by zero
emit_sdiv(d1,d2,quotient);
emit_mul(quotient,d2,remainder);
emit_sub(d1,remainder,remainder);
}
else
{
emit_movs(d1,remainder);
emit_negmi(remainder,remainder);
emit_movs(d2,HOST_TEMPREG);
emit_jeq((int)out+52); // Division by zero
emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
emit_clz(HOST_TEMPREG,quotient);
emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
emit_orimm(quotient,1<<31,quotient);
emit_shr(quotient,quotient,quotient);
emit_cmp(remainder,HOST_TEMPREG);
emit_subcs(remainder,HOST_TEMPREG,remainder);
emit_adcs(quotient,quotient,quotient);
emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
emit_jcc((int)out-16); // -4
emit_teq(d1,d2);
emit_negmi(quotient,quotient);
emit_test(d1,d1);
emit_negmi(remainder,remainder);
}
}
if(opcode2[i]==0x1B) // DIVU
{
@@ -4171,24 +4175,28 @@ static void multdiv_assemble_arm(int i,struct regstat *i_regs)
assert(quotient>=0);
assert(remainder>=0);
emit_test(d2,d2);
#ifdef HAVE_INTEGER_DIVIDE
emit_jeq((int)out+16); // Division by zero
emit_udiv(d1,d2,quotient);
emit_mul(quotient,d2,remainder);
emit_sub(d1,remainder,remainder);
#else
emit_jeq((int)out+44); // Division by zero
emit_clz(d2,HOST_TEMPREG);
emit_movimm(1<<31,quotient);
emit_shl(d2,HOST_TEMPREG,d2);
emit_mov(d1,remainder);
emit_shr(quotient,HOST_TEMPREG,quotient);
emit_cmp(remainder,d2);
emit_subcs(remainder,d2,remainder);
emit_adcs(quotient,quotient,quotient);
emit_shrcc_imm(d2,1,d2);
emit_jcc((int)out-16); // -4
#endif
if(arm_cpu_features.IDIVa)
{
emit_jeq((int)out+16); // Division by zero
emit_udiv(d1,d2,quotient);
emit_mul(quotient,d2,remainder);
emit_sub(d1,remainder,remainder);
}
else
{
emit_jeq((int)out+44); // Division by zero
emit_clz(d2,HOST_TEMPREG);
emit_movimm(1<<31,quotient);
emit_shl(d2,HOST_TEMPREG,d2);
emit_mov(d1,remainder);
emit_shr(quotient,HOST_TEMPREG,quotient);
emit_cmp(remainder,d2);
emit_subcs(remainder,d2,remainder);
emit_adcs(quotient,quotient,quotient);
emit_shrcc_imm(d2,1,d2);
emit_jcc((int)out-16); // -4
}
}
}
else // 64-bit
@@ -4568,6 +4576,10 @@ static void do_clear_cache()
// CPU-architecture-specific initialization
static void arch_init() {
detect_arm_cpu_features();
print_arm_cpu_features();
rounding_modes[0]=0x0<<22; // round
rounding_modes[1]=0x3<<22; // trunc
rounding_modes[2]=0x1<<22; // ceil
@@ -13,7 +13,6 @@
//#define REG_PREFETCH 1
#define HAVE_CONDITIONAL_CALL 1
#define RAM_OFFSET 1
#define HAVE_INTEGER_DIVIDE 1
/* ARM calling convention:
r0-r3, r12: caller-save
@@ -44,6 +44,7 @@
#if NEW_DYNAREC == NEW_DYNAREC_X86
#include "assem_x86.h"
#elif NEW_DYNAREC == NEW_DYNAREC_ARM
#include "arm_cpu_features.h"
#include "assem_arm.h"
#else
#error Unsupported dynarec architecture

0 comments on commit e250b48

Please sign in to comment.