Skip to content
Permalink
Browse files

Merge df3c1e0 into 5593987

  • Loading branch information
alex committed Jul 14, 2013
2 parents 5593987 + df3c1e0 commit d929d22c44a4742d6702ae6d3e87e25f5328fc33
Binary file not shown.
Binary file not shown.
@@ -6,3 +6,4 @@ Blog

announcing-topaz
one-year
type-specialized-instance-variables
@@ -0,0 +1,53 @@
Type Specialized Instance Variables
===================================

**Posted: July 13, 2013**

In Topaz, like most other VMs, all objects are stored in what are called
"boxes". Essentially that means when you have something like ``x = 5``, ``x``
is really a pointer to an object which contains ``5``, not the value ``5``
itself. This is often a source of performance problems for VMs, this generates
more garbage for the GC to process and means that to access the value ``5``
more memory dereferences are needed. Topaz's just-in-time compiler (JIT) is
often able to remove these allocations and memory dereferences in individual
loops or functions, however it's not able to remove them in structures that
stick around in memory, like objects.

Therefore, over the past week I've been working on an optimization for Topaz
called "type specialized instance variables". Basically what that means is that
Topaz keeps track of what types instance variables in an object tend to have,
and then specializes the storage to remove the indirection for ``Fixnum`` and
``Float`` objects.

Let's look at an example:

.. sourcecode:: ruby

class Point
def initialize(x, y, z)
@x = x
@y = y
@z = z
end
end

p = Point.new(1, 2, 3)

Before this optimization, ``p`` looked like this in memory:

.. image:: images/type-specialized-instances-before.png

And after the optimization, it looks like this:

.. image:: images/type-specialized-instances-after.png

With this optimization landed, Topaz will use less memory and be faster for
programs that store ``Fixnum`` and ``Float`` objects in memory. If you're
interested in this type of optimization you can read about a `similar one in
PyPy for lists`_ that we're in the process of porting to Topaz.

We're looking forward to doing our first release soon, we hope you'll test
Topaz out, and give us feedback with the `nightly builds`_ until then

.. _`similar one in PyPy for lists`: http://morepypy.blogspot.com/2011/10/more-compact-lists-with-list-strategies.html
.. _`nightly builds`: http://topazruby.com/builds/
@@ -33,45 +33,6 @@ def test_while_loop(self, topaz, tmpdir):
jump(p0, p1, p3, p4, p5, p6, p7, p10, i40, p20, p22, p28, descr=TargetToken(4310781936))
""")

def test_ivar_while_loop(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@i = 0
while @i < 10000
@i += 1
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, p20, p41, p31, p26, descr=TargetToken(4310781936))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff1088>)
p43 = force_token()
i44 = getfield_gc_pure(p41, descr=<FieldS topaz.objects.intobject.W_FixnumObject.inst_intvalue 8>)
i45 = int_lt(i44, 10000)
guard_true(i45, descr=<Guard0x100ff1010>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p46 = force_token()
i47 = int_add(i44, 1)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
p48 = new_with_vtable(4300302336)
setfield_gc(p48, i47, descr=<FieldS topaz.objects.intobject.W_FixnumObject.inst_intvalue 8>)
setarrayitem_gc(p26, 0, p48, descr=<ArrayP 8>)
i49 = arraylen_gc(p26, descr=<ArrayP 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, p20, p48, p31, p26, descr=TargetToken(4310781936))
""")

def test_constant_string(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
i = 0
@@ -59,3 +59,77 @@ def initialize
setfield_gc(p24, 58, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
jump(p0, p1, p3, p4, p5, p7, p10, i49, p19, p22, p24, p30, descr=TargetToken(4310782288))
""")

def test_unboxed_int_storage(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@i = 0
while @i < 10000
@i += 1
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, f39, p20, p31, p26, descr=TargetToken(4310773744))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
i42 = convert_float_bytes_to_longlong(f39)
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff6bd8>)
p43 = force_token()
i44 = int_lt(i42, 10000)
guard_true(i44, descr=<Guard0x100ff6b60>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p45 = force_token()
i46 = int_add(i42, 1)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
f47 = convert_longlong_bytes_to_float(i46)
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
setarrayitem_gc(p26, 0, f47, descr=<ArrayF 8>)
i48 = arraylen_gc(p26, descr=<ArrayF 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, f47, p20, p31, p26, descr=TargetToken(4310773744))
""")

def test_unboxed_float_storage(self, topaz, tmpdir):
traces = self.run(topaz, tmpdir, """
@data = 0.0
while @data < 10000.0
@data += 1.0
end
""")
self.assert_matches(traces[0].loop, """
label(p0, p1, p3, p4, p5, p6, p7, p10, p20, f37, p30, p26, descr=TargetToken(4310773744))
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
setfield_gc(p20, 23, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
guard_not_invalidated(descr=<Guard0x100ff1d30>)
p40 = force_token()
i41 = float_lt(f37, 10000.000000)
guard_true(i41, descr=<Guard0x100ff1cb8>)
debug_merge_point(0, 0, '<main> at JUMP_IF_FALSE')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
debug_merge_point(0, 0, '<main> at DUP_TOP')
debug_merge_point(0, 0, '<main> at LOAD_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at LOAD_CONST')
debug_merge_point(0, 0, '<main> at SEND')
p42 = force_token()
f43 = float_add(f37, 1.000000)
debug_merge_point(0, 0, '<main> at STORE_INSTANCE_VAR')
debug_merge_point(0, 0, '<main> at DISCARD_TOP')
debug_merge_point(0, 0, '<main> at JUMP')
debug_merge_point(0, 0, '<main> at LOAD_SELF')
setfield_gc(p20, 39, descr=<FieldS topaz.executioncontext.ExecutionContext.inst_last_instr 24>)
setarrayitem_gc(p26, 0, f43, descr=<ArrayF 8>)
i44 = arraylen_gc(p26, descr=<ArrayF 8>)
jump(p0, p1, p3, p4, p5, p6, p7, p10, p20, f43, p30, p26, descr=TargetToken(4310773744))
""")
@@ -1,39 +1,41 @@
import pytest

from topaz.mapdict import ClassNode

from .base import BaseTopazTest
from topaz import mapdict


class FakeObject(object):
storage = None

def __init__(self, map):
self.map = map
self.object_storage = self.unboxed_storage = None


class TestMapDict(BaseTopazTest):
class TestMapDict(object):
@pytest.mark.parametrize("i", range(10))
def test_simple_size_estimation(self, space, i):
class_node = ClassNode(i)
assert class_node.size_estimate() == 0
class_node = mapdict.ClassNode(i)
assert class_node.size_estimate.object_size_estimate() == 0
assert class_node.size_estimate.unboxed_size_estimate() == 0

for j in range(1000):
w_obj = FakeObject(class_node)
for a in "abcdefghij"[:i]:
w_obj.map.add_attr(space, w_obj, a)
assert class_node.size_estimate() == i
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)
assert class_node.size_estimate.object_size_estimate() == i
assert class_node.size_estimate.unboxed_size_estimate() == 0

@pytest.mark.parametrize("i", range(1, 10))
def test_avg_size_estimation(self, space, i):
class_node = ClassNode(i)
assert class_node.size_estimate() == 0
class_node = mapdict.ClassNode(i)
assert class_node.size_estimate.object_size_estimate() == 0
assert class_node.size_estimate.unboxed_size_estimate() == 0

for j in range(1000):
w_obj = FakeObject(class_node)
for a in "abcdefghij"[:i]:
w_obj.map.add_attr(space, w_obj, a)
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)
w_obj = FakeObject(class_node)
for a in "klmnopqars":
w_obj.map.add_attr(space, w_obj, a)
assert class_node.size_estimate() in [(i + 10) // 2, (i + 11) // 2]
w_obj.map = w_obj.map.add(space, mapdict.ObjectAttributeNode, a, w_obj)

assert class_node.size_estimate.object_size_estimate() in [(i + 10) // 2, (i + 11) // 2]
assert class_node.size_estimate.unboxed_size_estimate() == 0

0 comments on commit d929d22

Please sign in to comment.