Skip to content

Commit

Permalink
Merge 8600541 into 5593987
Browse files Browse the repository at this point in the history
  • Loading branch information
alex committed Jul 14, 2013
2 parents 5593987 + 8600541 commit 2d10e15
Show file tree
Hide file tree
Showing 9 changed files with 387 additions and 198 deletions.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions docs/blog/index.rst
Expand Up @@ -6,3 +6,4 @@ Blog

announcing-topaz
one-year
type-specialized-instance-variables
54 changes: 54 additions & 0 deletions docs/blog/type-specialized-instance-variables.rst
@@ -0,0 +1,54 @@
Type Specialized Instance Variables
===================================

**Posted: July 13, 2013**

In Topaz, like most other VMs, all objects are stored in what are called
"boxes". Essentially that means when you have something like ``x = 5``, ``x``
is really a pointer to an object which contains ``5``, not the value ``5``
itself. This is often a source of performance problems for VMs, because it
generates more garbage for the GC to process and means that to access the value
``5`` more memory dereferences are needed. Topaz's just-in-time compiler (JIT)
is often able to remove these allocations and memory dereferences in individual
loops or functions, however it's not able to remove them in structures that
stick around in memory, like objects.

Therefore, over the past week I've been working on an optimization for Topaz
called "type specialized instance variables". Basically what that means is that
Topaz keeps track of what types instance variables in an object tend to have,
and then specializes the storage to remove the indirection for ``Fixnum`` and
``Float`` objects.

Let's look at an example:

.. sourcecode:: ruby

class Point
def initialize(x, y, z)
@x = x
@y = y
@z = z
end
end

p = Point.new(1, 2, 3)

Before this optimization, ``p`` looked like this in memory. Each box indicates
an 8-byte (on 64-bit systems) value, and arrows are pointers:

.. image:: images/type-specialized-instances-before.png

And after the optimization, it looks like this:

.. image:: images/type-specialized-instances-after.png

With this optimization landed, Topaz will use less memory and be faster for
programs that store ``Fixnum`` and ``Float`` objects in memory. If you're
interested in this type of optimization you can read about a `similar one in
PyPy for lists`_ that we're in the process of porting to Topaz.

We're looking forward to doing our first release soon, we hope you'll test
Topaz out, and give us feedback with the `nightly builds`_ until then

.. _`similar one in PyPy for lists`: http://morepypy.blogspot.com/2011/10/more-compact-lists-with-list-strategies.html
.. _`nightly builds`: http://topazruby.com/builds/
39 changes: 0 additions & 39 deletions tests/jit/test_basic.py
Expand Up @@ -33,45 +33,6 @@ def test_while_loop(self, topaz, tmpdir):
jump(p0, p1, p3, p4, p5, p6, p7, p10, i40, p20, p22, p28, descr=TargetToken(4310781936))
""")

def test_ivar_while_loop(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@i = 0
while @i < 10000
@i += 1
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, p20, p41, p31, p26, descr=TargetToken(4310781936))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff1088>)
p43 = force_token()
i44 = getfield_gc_pure(p41, descr=<FieldS topaz.objects.intobject.W_FixnumObject.inst_intvalue 8>)
i45 = int_lt(i44, 10000)
guard_true(i45, descr=<Guard0x100ff1010>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p46 = force_token()
i47 = int_add(i44, 1)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
p48 = new_with_vtable(4300302336)
setfield_gc(p48, i47, descr=<FieldS topaz.objects.intobject.W_FixnumObject.inst_intvalue 8>)
setarrayitem_gc(p26, 0, p48, descr=<ArrayP 8>)
i49 = arraylen_gc(p26, descr=<ArrayP 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, p20, p48, p31, p26, descr=TargetToken(4310781936))
""")

def test_constant_string(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
i = 0
Expand Down
74 changes: 74 additions & 0 deletions tests/jit/test_instance_vars.py
Expand Up @@ -59,3 +59,77 @@ def initialize
setfield_gc(p24, 58, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
jump(p0, p1, p3, p4, p5, p7, p10, i49, p19, p22, p24, p30, descr=TargetToken(4310782288))
""")

def test_unboxed_int_storage(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@i = 0
while @i < 10000
@i += 1
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, f39, p20, p31, p26, descr=TargetToken(4310773744))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
i42 = convert_float_bytes_to_longlong(f39)
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff6bd8>)
p43 = force_token()
i44 = int_lt(i42, 10000)
guard_true(i44, descr=<Guard0x100ff6b60>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p45 = force_token()
i46 = int_add(i42, 1)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
f47 = convert_longlong_bytes_to_float(i46)
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
setarrayitem_gc(p26, 0, f47, descr=<ArrayF 8>)
i48 = arraylen_gc(p26, descr=<ArrayF 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, f47, p20, p31, p26, descr=TargetToken(4310773744))
""")

def test_unboxed_float_storage(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@data = 0.0
while @data < 10000.0
@data += 1.0
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, p20, f37, p30, p26, descr=TargetToken(4310773744))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff1d30>)
p40 = force_token()
i41 = float_lt(f37, 10000.000000)
guard_true(i41, descr=<Guard0x100ff1cb8>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p42 = force_token()
f43 = float_add(f37, 1.000000)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
setarrayitem_gc(p26, 0, f43, descr=<ArrayF 8>)
i44 = arraylen_gc(p26, descr=<ArrayF 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, p20, f43, p30, p26, descr=TargetToken(4310773744))
""")
32 changes: 17 additions & 15 deletions tests/test_mapdict.py
@@ -1,39 +1,41 @@
import pytest

from topaz.mapdict import ClassNode

from .base import BaseTopazTest
from topaz import mapdict


class FakeObject(object):
storage = None

def __init__(self, map):
self.map = map
self.object_storage = self.unboxed_storage = None


class TestMapDict(BaseTopazTest):
class TestMapDict(object):
@pytest.mark.parametrize("i", range(10))
def test_simple_size_estimation(self, space, i):
class_node = ClassNode(i)
assert class_node.size_estimate() == 0
class_node = mapdict.ClassNode(i)
assert class_node.size_estimate.object_size_estimate() == 0
assert class_node.size_estimate.unboxed_size_estimate() == 0

for j in range(1000):
w_obj = FakeObject(class_node)
for a in "abcdefghij"[:i]:
w_obj.map.add_attr(space, w_obj, a)
assert class_node.size_estimate() == i
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)
assert class_node.size_estimate.object_size_estimate() == i
assert class_node.size_estimate.unboxed_size_estimate() == 0

@pytest.mark.parametrize("i", range(1, 10))
def test_avg_size_estimation(self, space, i):
class_node = ClassNode(i)
assert class_node.size_estimate() == 0
class_node = mapdict.ClassNode(i)
assert class_node.size_estimate.object_size_estimate() == 0
assert class_node.size_estimate.unboxed_size_estimate() == 0

for j in range(1000):
w_obj = FakeObject(class_node)
for a in "abcdefghij"[:i]:
w_obj.map.add_attr(space, w_obj, a)
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)
w_obj = FakeObject(class_node)
for a in "klmnopqars":
w_obj.map.add_attr(space, w_obj, a)
assert class_node.size_estimate() in [(i + 10) // 2, (i + 11) // 2]
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)

assert class_node.size_estimate.object_size_estimate() in [(i + 10) // 2, (i + 11) // 2]
assert class_node.size_estimate.unboxed_size_estimate() == 0

0 comments on commit 2d10e15

Please sign in to comment.