Permalink
Browse files

Use a dedicated register to hold the memory base on amd64.

Since we don't have %fs, this saves some indirection while accessing
memory, and speeds things up measurably.

We should really just use one of the extra registers we have on amd64,
but our assembler doesn't support REX.{r,x,b} yet.
  • Loading branch information...
1 parent 4aa11db commit 1f46955291ca287269d1c80732673570fc120991 @nelhage committed Aug 6, 2012
Showing with 24 additions and 31 deletions.
  1. +11 −24 bt.cpp
  2. +3 −2 bt_helper_64.S
  3. +10 −5 x86.h
View
35 bt.cpp
@@ -187,31 +187,19 @@ int bt_setup_cpu_segment(beta_cpu *cpu) {
#ifdef __x86_64__
#define BEMU_REG_IP REG_RIP
#define BEMU_REG_BP REG_RBP
+#define BEMU_REG_AX REG_RAX
#else
#define BEMU_REG_IP REG_EIP
#define BEMU_REG_BP REG_EBP
+#define BEMU_REG_AX REG_EAX
#endif
-#ifdef __x86_64__
-byteptr bt_decode_fault_addr(ucontext_t *uctx, beta_cpu *cpu, bdecode *insn) {
- switch (insn->opcode) {
- case OP_LD:
- case OP_ST:
- case OP_LDR:
- break;
- default:
- panic("Fault from a non-memory opcode?");
- break;
- }
- return (uint8_t*)uctx->uc_mcontext.gregs[REG_RAX] - (uint8_t*)cpu->memory;
-}
-#else
byteptr bt_decode_fault_addr(ucontext_t *uctx, beta_cpu *cpu, bdecode *insn) {
- uint8_t *eip = (uint8_t*)uctx->uc_mcontext.gregs[REG_EIP];
+ uint8_t *eip = (uint8_t*)uctx->uc_mcontext.gregs[BEMU_REG_IP];
switch (insn->opcode) {
case OP_LD:
case OP_ST:
- return uctx->uc_mcontext.gregs[REG_EAX];
+ return uctx->uc_mcontext.gregs[BEMU_REG_AX];
break;
case OP_LDR:
/* Skip the FS prefix, the opcode, and the modrm byte to find
@@ -223,7 +211,6 @@ byteptr bt_decode_fault_addr(ucontext_t *uctx, beta_cpu *cpu, bdecode *insn) {
break;
}
}
-#endif
void bt_segv(int signo UNUSED, siginfo_t *info, void *ctx) {
ucontext_t *uctx = (ucontext_t*)ctx;
@@ -345,7 +332,9 @@ void bt_load_reg(X86Assembler *cc, uint8_t breg, X86Register reg) {
switch (breg) {
case 31: cc->xor_(reg, reg); break;
case 0: cc->mov(X86ESI, reg); break;
+#ifndef __x86_64__
case 1: cc->mov(X86EDI, reg); break;
+#endif
case SP: cc->mov(X86EBX, reg); break;
case BP: cc->mov(X86EDX, reg); break;
default: cc->mov(bt_register_address(breg), reg);
@@ -357,7 +346,9 @@ void bt_store_reg(X86Assembler *cc, T val, uint8_t breg) {
switch (breg) {
case 31: break;
case 0: cc->mov(val, X86ESI); break;
+#ifndef __x86_64__
case 1: cc->mov(val, X86EDI); break;
+#endif
case SP: cc->mov(val, X86EBX); break;
case BP: cc->mov(val, X86EDX); break;
default: cc->mov(val, bt_register_address(breg));
@@ -510,9 +501,8 @@ inline void bt_translate_other(X86Assembler *buf, byteptr pc, bdecode *inst) {
buf->and_(~(PC_SUPERVISOR | 0x3), X86EAX);
#ifdef __x86_64__
- buf->add_(X86Mem(X86EBP, (uint32_t)offsetof(beta_cpu, memory)), X86RAX);
bt_save_fault_entry(buf, pc);
- buf->mov(X86ECX, X86Mem(X86RAX));
+ buf->mov(X86ECX, X86Mem(0, X86RDI, X86RAX));
#else
bt_save_fault_entry(buf, pc);
buf->byte(PREFIX_SEG_FS);
@@ -529,9 +519,8 @@ inline void bt_translate_other(X86Assembler *buf, byteptr pc, bdecode *inst) {
buf->and_(~(PC_SUPERVISOR | 0x3), X86EAX);
#ifdef __x86_64__
- buf->add_(X86Mem(X86EBP, (uint32_t)offsetof(beta_cpu, memory)), X86RAX);
bt_save_fault_entry(buf, pc);
- buf->mov(X86Mem(X86RAX), X86EAX);
+ buf->mov(X86Mem(0, X86RDI, X86RAX), X86EAX);
#else
bt_save_fault_entry(buf, pc);
buf->byte(PREFIX_SEG_FS);
@@ -543,10 +532,8 @@ inline void bt_translate_other(X86Assembler *buf, byteptr pc, bdecode *inst) {
break;
case OP_LDR:
#ifdef __x86_64__
- buf->mov(X86Mem(X86RBP, (uint32_t)offsetof(beta_cpu, memory)), X86RAX);
- buf->add_(((pc + 4 + 4*inst->imm) & ~(PC_SUPERVISOR|0x03)), X86RAX);
bt_save_fault_entry(buf, pc);
- buf->mov(X86Mem(X86EAX), X86EAX);
+ buf->mov(X86Mem(X86RDI, ((pc + 4 + 4*inst->imm) & ~(PC_SUPERVISOR|0x03))), X86EAX);
#else
bt_save_fault_entry(buf, pc);
buf->byte(PREFIX_SEG_FS);
View
@@ -23,7 +23,8 @@
mov (4*SP)(%rbp), %ebx
mov (4*BP)(%rbp), %edx
mov (4*0)(%rbp), %esi
- mov (4*1)(%rbp), %edi
+ mov (32*4 + 8)(%rbp), %rdi
+// mov (4*1)(%rbp), %edi
.endm
.macro SAVE_CPU pc=1
@@ -33,7 +34,7 @@
mov %ebx, (4*SP)(%rbp)
mov %edx, (4*BP)(%rbp)
mov %esi, (4*0)(%rbp)
- mov %edi, (4*1)(%rbp)
+// mov %edi, (4*1)(%rbp)
.endm
_(bt_interp_one):
View
15 x86.h
@@ -374,15 +374,20 @@ struct X86ReferenceSIB {
ASSERT(base.bits == HOST_BITS);
ASSERT(index.bits == HOST_BITS);
switch(scale) {
-#define S(n) case n: sv = SCALE_##n
+#define S(n) case n: sv = SCALE_##n; break
S(1); S(2); S(4); S(8);
#undef S
default:
panic("Illegal scale value: %d", scale);
}
- cc->modrm(MOD_INDIR_DISP32, reg.val, REG_SIB);
- cc->sib(sv, index.val, base.val);
- cc->word(offset);
+ if (offset) {
+ cc->modrm(MOD_INDIR_DISP32, reg.val, REG_SIB);
+ cc->sib(sv, index.val, base.val);
+ cc->word(offset);
+ } else {
+ cc->modrm(MOD_INDIR, reg.val, REG_SIB);
+ cc->sib(sv, index.val, base.val);
+ }
}
};
template<> struct is_modrm<X86ReferenceSIB> { const static true_type val; };
@@ -403,7 +408,7 @@ static inline X86ReferenceAbs X86Mem(uint32_t addr) {
X86ReferenceAbs r = {addr};
return r;
}
-static inline X86ReferenceSIB X86Mem(uint32_t off, X86Register base, X86Register index, uint8_t scale) {
+static inline X86ReferenceSIB X86Mem(uint32_t off, X86Register base, X86Register index, uint8_t scale = 1) {
X86ReferenceSIB r = {off, base, index, scale};
return r;
}

0 comments on commit 1f46955

Please sign in to comment.