Permalink
Browse files

Allow optimizing compiler to compute Math.log using untagged doubles.

  • Loading branch information...
1 parent 8f89006 commit 1a008f28d5470136c1e6e9c3497c14a4cdb20a4c whesse@chromium.org committed Dec 13, 2010
View
3 src/arm/lithium-arm.cc
@@ -1354,6 +1354,9 @@ LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
case kMathPowHalf:
Abort("MathPowHalf LUnaryMathOperation not implemented");
return NULL;
+ case kMathLog:
+ Abort("MathLog LUnaryMathOperation not implemented");
+ return NULL;
default:
UNREACHABLE();
return NULL;
View
1 src/code-stubs.h
@@ -56,6 +56,7 @@ namespace internal {
V(FastNewContext) \
V(FastCloneShallowArray) \
V(TranscendentalCache) \
+ V(TranscendentalCacheSSE2) \
V(GenericUnaryOp) \
V(RevertToNumber) \
V(ToBoolean) \
View
3 src/heap.h
@@ -2054,8 +2054,9 @@ class TranscendentalCache {
// Allow access to the caches_ array as an ExternalReference.
friend class ExternalReference;
- // Inline implementation of the caching.
+ // Inline implementation of the cache.
friend class TranscendentalCacheStub;
+ friend class TranscendentalCacheSSE2Stub;
static TranscendentalCache* caches_[kNumberOfCaches];
Element elements_[kCacheSize];
View
2 src/hydrogen-instructions.h
@@ -1380,6 +1380,7 @@ class HUnaryMathOperation: public HUnaryOperation {
break;
case kMathSqrt:
case kMathPowHalf:
+ case kMathLog:
default:
set_representation(Representation::Double());
}
@@ -1399,6 +1400,7 @@ class HUnaryMathOperation: public HUnaryOperation {
case kMathCeil:
case kMathSqrt:
case kMathPowHalf:
+ case kMathLog:
return Representation::Double();
break;
case kMathAbs:
View
1 src/hydrogen.cc
@@ -4081,6 +4081,7 @@ bool HGraphBuilder::TryMathFunctionInline(Call* expr) {
case kMathFloor:
case kMathAbs:
case kMathSqrt:
+ case kMathLog:
if (argument_count == 2) {
HValue* argument = Pop();
Drop(1); // Receiver.
View
41 src/ia32/assembler-ia32.cc
@@ -2409,6 +2409,7 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
emit_sse_operand(dst, src);
}
+
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
@@ -2431,6 +2432,17 @@ void Assembler::movd(XMMRegister dst, const Operand& src) {
}
+void Assembler::movd(const Operand& dst, XMMRegister src) {
+ ASSERT(CpuFeatures::IsEnabled(SSE2));
+ EnsureSpace ensure_space(this);
+ last_pc_ = pc_;
+ EMIT(0x66);
+ EMIT(0x0F);
+ EMIT(0x7E);
+ emit_sse_operand(src, dst);
+}
+
+
void Assembler::pand(XMMRegister dst, XMMRegister src) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
@@ -2465,15 +2477,40 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) {
}
-void Assembler::psllq(XMMRegister reg, int8_t imm8) {
+void Assembler::psllq(XMMRegister reg, int8_t shift) {
ASSERT(CpuFeatures::IsEnabled(SSE2));
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x66);
EMIT(0x0F);
EMIT(0x73);
emit_sse_operand(esi, reg); // esi == 6
- EMIT(imm8);
+ EMIT(shift);
+}
+
+
+void Assembler::pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle) {
+ ASSERT(CpuFeatures::IsEnabled(SSE2));
+ EnsureSpace ensure_space(this);
+ last_pc_ = pc_;
+ EMIT(0x66);
+ EMIT(0x0F);
+ EMIT(0x70);
+ emit_sse_operand(dst, src);
+ EMIT(shuffle);
+}
+
+
+void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
+ ASSERT(CpuFeatures::IsEnabled(SSE2));
+ EnsureSpace ensure_space(this);
+ last_pc_ = pc_;
+ EMIT(0x66);
+ EMIT(0x0F);
+ EMIT(0x3A);
+ EMIT(0x16);
+ emit_sse_operand(src, dst);
+ EMIT(offset);
}
View
5 src/ia32/assembler-ia32.h
@@ -905,13 +905,16 @@ class Assembler : public Malloced {
void movdbl(const Operand& dst, XMMRegister src);
void movd(XMMRegister dst, const Operand& src);
+ void movd(const Operand& src, XMMRegister dst);
void movsd(XMMRegister dst, XMMRegister src);
void pand(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src);
- void psllq(XMMRegister reg, int8_t imm8);
+ void psllq(XMMRegister reg, int8_t shift);
+ void pshufd(XMMRegister dst, XMMRegister src, int8_t shuffle);
+ void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
// Parallel XMM operations.
void movntdqa(XMMRegister src, const Operand& dst);
View
131 src/ia32/code-stubs-ia32.cc
@@ -2683,6 +2683,137 @@ void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {
}
+void TranscendentalCacheSSE2Stub::Generate(MacroAssembler* masm) {
+ // Input on stack:
+ // esp[0]: return address.
+ // Input in registers:
+ // xmm1: untagged double input argument.
+ // Output:
+ // xmm1: untagged double result.
+ Label skip_cache;
+ Label call_runtime;
+
+ // Input is an untagged double in xmm1.
+ // Compute hash (the shifts are arithmetic):
+ // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);
+ __ pextrd(Operand(edx), xmm1, 0x1); // copy xmm1[63..32] to edx.
+ __ movd(Operand(ebx), xmm1);
+
+ // xmm1 = double value
+ // ebx = low 32 bits of double value
+ // edx = high 32 bits of double value
+ // Compute hash (the shifts are arithmetic):
+ // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);
+ __ mov(ecx, ebx);
+ __ xor_(ecx, Operand(edx));
+ __ mov(eax, ecx);
+ __ sar(eax, 16);
+ __ xor_(ecx, Operand(eax));
+ __ mov(eax, ecx);
+ __ sar(eax, 8);
+ __ xor_(ecx, Operand(eax));
+ ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));
+ __ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));
+
+ // xmm1 = double value.
+ // ebx = low 32 bits of double value.
+ // edx = high 32 bits of double value.
+ // ecx = TranscendentalCache::hash(double value).
+ __ mov(eax,
+ Immediate(ExternalReference::transcendental_cache_array_address()));
+ // Eax points to cache array.
+ __ mov(eax, Operand(eax, type_ * sizeof(TranscendentalCache::caches_[0])));
+ // Eax points to the cache for the type type_.
+ // If NULL, the cache hasn't been initialized yet, so go through runtime.
+ __ test(eax, Operand(eax));
+ __ j(zero, &call_runtime);
+#ifdef DEBUG
+ // Check that the layout of cache elements match expectations.
+ { TranscendentalCache::Element test_elem[2];
+ char* elem_start = reinterpret_cast<char*>(&test_elem[0]);
+ char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);
+ char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));
+ char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));
+ char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));
+ CHECK_EQ(12, elem2_start - elem_start); // Two uint_32's and a pointer.
+ CHECK_EQ(0, elem_in0 - elem_start);
+ CHECK_EQ(kIntSize, elem_in1 - elem_start);
+ CHECK_EQ(2 * kIntSize, elem_out - elem_start);
+ }
+#endif
+ // Find the address of the ecx'th entry in the cache, i.e., &eax[ecx*12].
+ __ lea(ecx, Operand(ecx, ecx, times_2, 0));
+ __ lea(ecx, Operand(eax, ecx, times_4, 0));
+ // Check if cache matches: Double value is stored in uint32_t[2] array.
+ NearLabel cache_miss;
+ __ cmp(ebx, Operand(ecx, 0));
+ __ j(not_equal, &cache_miss);
+ __ cmp(edx, Operand(ecx, kIntSize));
+ __ j(not_equal, &cache_miss);
+ // Cache hit!
+ __ mov(eax, Operand(ecx, 2 * kIntSize));
+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
+ __ Ret();
+
+ __ bind(&cache_miss);
+ // Update cache with new value.
+ // We are short on registers, so use no_reg as scratch.
+ // This gives slightly larger code.
+ __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);
+ __ sub(Operand(esp), Immediate(sizeof(double)));
+ __ movdbl(Operand(esp, 0), xmm1);
+ __ fld_d(Operand(esp, 0));
+ __ add(Operand(esp), Immediate(sizeof(double)));
+ GenerateOperation(masm);
+ __ mov(Operand(ecx, 0), ebx);
+ __ mov(Operand(ecx, kIntSize), edx);
+ __ mov(Operand(ecx, 2 * kIntSize), eax);
+ __ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));
+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
+ __ Ret();
+
+ __ bind(&skip_cache);
+ __ sub(Operand(esp), Immediate(2 * kPointerSize));
+ __ movdbl(Operand(esp, 0), xmm1);
+ __ fld_d(Operand(esp, 0));
+ GenerateOperation(masm);
+ __ fstp_d(Operand(esp, 0));
+ __ movdbl(xmm1, Operand(esp, 0));
+ __ add(Operand(esp), Immediate(2 * kPointerSize));
+ __ Ret();
+
+ __ bind(&call_runtime);
+ __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);
+ __ push(eax);
+ __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm1);
+ __ CallRuntime(RuntimeFunction(), 1);
+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));
+ __ Ret();
+}
+
+
+Runtime::FunctionId TranscendentalCacheSSE2Stub::RuntimeFunction() {
+ switch (type_) {
+ // Add more cases when necessary.
+ case TranscendentalCache::LOG: return Runtime::kMath_log;
+ default:
+ UNIMPLEMENTED();
+ return Runtime::kAbort;
+ }
+}
+
+
+void TranscendentalCacheSSE2Stub::GenerateOperation(MacroAssembler* masm) {
+ // Only free register is edi.
+ // Input value is on FP stack and in xmm1.
+
+ ASSERT(type_ == TranscendentalCache::LOG);
+ __ fldln2();
+ __ fxch();
+ __ fyl2x();
+}
+
+
// Get the integer part of a heap number. Surprisingly, all this bit twiddling
// is faster than using the built-in instructions on floating point registers.
// Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the
View
19 src/ia32/code-stubs-ia32.h
@@ -45,13 +45,32 @@ class TranscendentalCacheStub: public CodeStub {
void Generate(MacroAssembler* masm);
private:
TranscendentalCache::Type type_;
+
Major MajorKey() { return TranscendentalCache; }
int MinorKey() { return type_; }
Runtime::FunctionId RuntimeFunction();
void GenerateOperation(MacroAssembler* masm);
};
+// Check the transcendental cache, or generate the result, using SSE2.
+// The argument and result will be in xmm1.
+// Only supports TranscendentalCache::LOG at this point.
+class TranscendentalCacheSSE2Stub: public CodeStub {
+ public:
+ explicit TranscendentalCacheSSE2Stub(TranscendentalCache::Type type)
+ : type_(type) {}
+ void Generate(MacroAssembler* masm);
+ private:
+ TranscendentalCache::Type type_;
+
+ Major MajorKey() { return TranscendentalCacheSSE2; }
+ int MinorKey() { return type_; }
+ Runtime::FunctionId RuntimeFunction();
+ void GenerateOperation(MacroAssembler* masm);
+};
+
+
class ToBooleanStub: public CodeStub {
public:
ToBooleanStub() { }
View
96 src/ia32/disasm-ia32.cc
@@ -1107,6 +1107,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else {
UnimplementedInstruction();
}
+ } else if (*data == 0x3A) {
+ data++;
+ if (*data == 0x16) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ int8_t imm8 = static_cast<int8_t>(data[1]);
+ AppendToBuffer("pextrd %s,%s,%d",
+ NameOfXMMRegister(regop),
+ NameOfXMMRegister(rm),
+ static_cast<int>(imm8));
+ data += 2;
+ } else {
+ UnimplementedInstruction();
+ }
} else if (*data == 0x2E || *data == 0x2F) {
const char* mnem = (*data == 0x2E) ? "ucomisd" : "comisd";
data++;
@@ -1129,6 +1144,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfCPURegister(regop),
NameOfXMMRegister(rm));
data++;
+ } else if (*data == 0x54) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ AppendToBuffer("andpd %s,%s",
+ NameOfXMMRegister(regop),
+ NameOfXMMRegister(rm));
+ data++;
} else if (*data == 0x57) {
data++;
int mod, regop, rm;
@@ -1149,13 +1172,47 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movdqa %s,", NameOfXMMRegister(regop));
data += PrintRightOperand(data);
+ } else if (*data == 0x70) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ int8_t imm8 = static_cast<int8_t>(data[1]);
+ AppendToBuffer("pshufd %s,%s,%d",
+ NameOfXMMRegister(regop),
+ NameOfXMMRegister(rm),
+ static_cast<int>(imm8));
+ data += 2;
+ } else if (*data == 0x73) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ int8_t imm8 = static_cast<int8_t>(data[1]);
+ AppendToBuffer("psllq %s,%d",
+ NameOfXMMRegister(rm),
+ static_cast<int>(imm8));
+ data += 2;
} else if (*data == 0x7F) {
AppendToBuffer("movdqa ");
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
+ } else if (*data == 0x7E) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ AppendToBuffer("movd ");
+ data += PrintRightOperand(data);
+ AppendToBuffer(",%s", NameOfXMMRegister(regop));
+ } else if (*data == 0xDB) {
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ AppendToBuffer("pand %s,%s",
+ NameOfXMMRegister(regop),
+ NameOfXMMRegister(rm));
+ data++;
} else if (*data == 0xE7) {
AppendToBuffer("movntdq ");
data++;
@@ -1164,38 +1221,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xEF) {
- data++;
- int mod, regop, rm;
- get_modrm(*data, &mod, &regop, &rm);
- AppendToBuffer("pxor %s,%s",
- NameOfXMMRegister(regop),
- NameOfXMMRegister(rm));
- data++;
- } else if (*data == 0xDB) {
- data++;
- int mod, regop, rm;
- get_modrm(*data, &mod, &regop, &rm);
- AppendToBuffer("pand %s,%s",
- NameOfXMMRegister(regop),
- NameOfXMMRegister(rm));
- data++;
- } else if (*data == 0x73) {
- data++;
- int mod, regop, rm;
- get_modrm(*data, &mod, &regop, &rm);
- int8_t imm8 = static_cast<int8_t>(data[1]);
- AppendToBuffer("psllq %s,%d",
- NameOfXMMRegister(rm),
- static_cast<int>(imm8));
- data += 2;
- } else if (*data == 0x54) {
- data++;
- int mod, regop, rm;
- get_modrm(*data, &mod, &regop, &rm);
- AppendToBuffer("andpd %s,%s",
- NameOfXMMRegister(regop),
- NameOfXMMRegister(rm));
- data++;
+ data++;
+ int mod, regop, rm;
+ get_modrm(*data, &mod, &regop, &rm);
+ AppendToBuffer("pxor %s,%s",
+ NameOfXMMRegister(regop),
+ NameOfXMMRegister(rm));
+ data++;
} else {
UnimplementedInstruction();
}
View
11 src/ia32/lithium-codegen-ia32.cc
@@ -2235,6 +2235,13 @@ void LCodeGen::DoPower(LPower* instr) {
}
+void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
+ ASSERT(ToDoubleRegister(instr->result()).is(xmm1));
+ TranscendentalCacheSSE2Stub stub(TranscendentalCache::LOG);
+ CallCode(stub.GetCode(), RelocInfo::CODE_TARGET, instr);
+}
+
+
void LCodeGen::DoUnaryMathOperation(LUnaryMathOperation* instr) {
switch (instr->op()) {
case kMathAbs:
@@ -2252,6 +2259,10 @@ void LCodeGen::DoUnaryMathOperation(LUnaryMathOperation* instr) {
case kMathPowHalf:
DoMathPowHalf(instr);
break;
+ case kMathLog:
+ DoMathLog(instr);
+ break;
+
default:
UNREACHABLE();
}
View
1 src/ia32/lithium-codegen-ia32.h
@@ -176,6 +176,7 @@ class LCodeGen BASE_EMBEDDED {
void DoMathRound(LUnaryMathOperation* instr);
void DoMathSqrt(LUnaryMathOperation* instr);
void DoMathPowHalf(LUnaryMathOperation* instr);
+ void DoMathLog(LUnaryMathOperation* instr);
// Support for recording safepoint and position information.
void RecordSafepoint(LPointerMap* pointers, int deoptimization_index);
View
38 src/ia32/lithium-ia32.cc
@@ -1355,22 +1355,28 @@ LInstruction* LChunkBuilder::DoCallConstantFunction(
LInstruction* LChunkBuilder::DoUnaryMathOperation(HUnaryMathOperation* instr) {
MathFunctionId op = instr->op();
- LOperand* input = UseRegisterAtStart(instr->value());
- LInstruction* result = new LUnaryMathOperation(input);
- switch (op) {
- case kMathAbs:
- return AssignEnvironment(AssignPointerMap(DefineSameAsFirst(result)));
- case kMathFloor:
- return AssignEnvironment(DefineAsRegister(result));
- case kMathRound:
- return AssignEnvironment(DefineAsRegister(result));
- case kMathSqrt:
- return DefineSameAsFirst(result);
- case kMathPowHalf:
- return AssignEnvironment(DefineSameAsFirst(result));
- default:
- UNREACHABLE();
- return NULL;
+ if (op == kMathLog) {
+ LOperand* input = UseFixedDouble(instr->value(), xmm1);
+ LInstruction* result = new LUnaryMathOperation(input);
+ return MarkAsCall(DefineFixedDouble(result, xmm1), instr);
+ } else {
+ LOperand* input = UseRegisterAtStart(instr->value());
+ LInstruction* result = new LUnaryMathOperation(input);
+ switch (op) {
+ case kMathAbs:
+ return AssignEnvironment(AssignPointerMap(DefineSameAsFirst(result)));
+ case kMathFloor:
+ return AssignEnvironment(DefineAsRegister(result));
+ case kMathRound:
+ return AssignEnvironment(DefineAsRegister(result));
+ case kMathSqrt:
+ return DefineSameAsFirst(result);
+ case kMathPowHalf:
+ return AssignEnvironment(DefineSameAsFirst(result));
+ default:
+ UNREACHABLE();
+ return NULL;
+ }
}
}
View
1 src/math.js
@@ -265,6 +265,7 @@ function SetupMath() {
%SetMathFunctionId($Math.abs, 4);
%SetMathFunctionId($Math.sqrt, 0xd);
%SetMathFunctionId($Math.pow, 0xe);
+ %SetMathFunctionId($Math.log, 5);
// TODO(erikcorry): Set the id of the other functions so they can be
// optimized.
};

0 comments on commit 1a008f2

Please sign in to comment.