Skip to content

Commit e08b47e

Browse files
committed
Make it possible to lazily deserialize DefNode in Loader.java
* TRUFFLERUBY_METRICS_REPS=5 jt metrics time --experimental-options -e0 For parsing-core: before: 0.097 0.099 0.092 0.096 after: 0.061 0.063 0.066 0.059 * Remove extra trailing spaces by using `<%-#`.
1 parent c8fd89e commit e08b47e

File tree

3 files changed

+69
-14
lines changed

3 files changed

+69
-14
lines changed

templates/java/org/ruby_lang/prism/Loader.java.erb

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import java.util.Locale;
1616
public class Loader {
1717

1818
public static ParseResult load(byte[] serialized, byte[] sourceBytes) {
19-
return new Loader(serialized, sourceBytes).load();
19+
return new Loader(serialized).load(sourceBytes);
2020
}
2121

2222
// Overridable methods
@@ -71,19 +71,21 @@ public class Loader {
7171
}
7272

7373
private final ByteBuffer buffer;
74-
private final Nodes.Source source;
7574
protected String encodingName;
7675
<%- if string_type == "String" -%>
7776
private Charset encodingCharset;
7877
<%- end -%>
7978
private ConstantPool constantPool;
8079

81-
protected Loader(byte[] serialized, byte[] sourceBytes) {
80+
protected Loader(byte[] serialized) {
8281
this.buffer = ByteBuffer.wrap(serialized).order(ByteOrder.nativeOrder());
83-
this.source = new Nodes.Source(sourceBytes);
8482
}
8583

86-
protected ParseResult load() {
84+
// We pass sourceBytes here and not in the constructor to avoid keeping
85+
// the sourceBytes in memory unnecessarily with lazy DefNode's which hold on the Loader.
86+
protected ParseResult load(byte[] sourceBytes) {
87+
Nodes.Source source = new Nodes.Source(sourceBytes);
88+
8789
expect((byte) 'P', "incorrect prism header");
8890
expect((byte) 'R', "incorrect prism header");
8991
expect((byte) 'I', "incorrect prism header");
@@ -331,6 +333,10 @@ public class Loader {
331333
return negative ? result.negate() : result;
332334
}
333335

336+
<%-
337+
base_params = [*("nodeId" if Prism::Template::INCLUDE_NODE_ID), "startOffset", "length"]
338+
base_params_sig = base_params.map { "int #{_1}" }.join(", ")
339+
-%>
334340
private Nodes.Node loadNode() {
335341
int type = buffer.get() & 0xFF;
336342
<%- if Prism::Template::INCLUDE_NODE_ID -%>
@@ -347,7 +353,7 @@ public class Loader {
347353
params = []
348354
params << "nodeId" if Prism::Template::INCLUDE_NODE_ID
349355
params << "startOffset" << "length"
350-
params << "buffer.getInt()" if node.needs_serialized_length?
356+
params << "buffer.getInt()" << "null" if node.needs_serialized_length?
351357
params << "loadFlags()" if node.flags
352358
params.concat node.semantic_fields.map { |field|
353359
case field
@@ -370,13 +376,44 @@ public class Loader {
370376
else raise
371377
end
372378
}
379+
$DefNode_params = params if node.name == "DefNode"
373380
-%>
381+
<%- if node.name == "DefNode" -%>
382+
return loadDefNode(<%= base_params.join(", ") -%>);
383+
<%- else -%>
374384
return new Nodes.<%= node.name %>(<%= params.join(", ") -%>);
385+
<%- end -%>
375386
<%- end -%>
376387
default:
377388
throw new Error("Unknown node type: " + type);
378389
}
379390
}
391+
392+
// Can be overridden to use createLazyDefNode instead
393+
protected Nodes.DefNode loadDefNode(<%= base_params_sig -%>) {
394+
return createDefNode(<%= base_params.join(", ") -%>);
395+
}
396+
397+
protected Nodes.DefNode createLazyDefNode(<%= base_params_sig -%>) {
398+
int bufferPosition = buffer.position();
399+
int serializedLength = buffer.getInt();
400+
// Load everything except the body and locals, because the name, receiver, parameters are still needed for lazily defining the method
401+
Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_STRING_ARRAY);
402+
buffer.position(bufferPosition + serializedLength); // skip past the serialized DefNode
403+
return lazyDefNode;
404+
}
405+
406+
protected Nodes.DefNode createDefNode(<%= base_params_sig -%>) {
407+
return new Nodes.DefNode(<%= $DefNode_params.join(", ") -%>);
408+
}
409+
410+
Nodes.DefNode createDefNodeFromSavedPosition(<%= base_params_sig -%>, int bufferPosition) {
411+
// This method mutates the buffer position and may be called from different threads so we must synchronize
412+
synchronized (this) {
413+
buffer.position(bufferPosition);
414+
return createDefNode(<%= base_params.join(", ") -%>);
415+
}
416+
}
380417
<%- array_types.uniq.each do |type| -%>
381418

382419
private static final Nodes.<%= type %>[] EMPTY_<%= type %>_ARRAY = {};

templates/java/org/ruby_lang/prism/Nodes.java.erb

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public abstract class Nodes {
142142

143143
protected abstract String toString(String indent);
144144
}
145-
<%# FLAGS -%>
145+
<%-# FLAGS -%>
146146
<%- flags.each do |flag| -%>
147147
148148
/**
@@ -194,7 +194,7 @@ public abstract class Nodes {
194194
<%- end -%>
195195
}
196196
<%- end -%>
197-
<%# NODES -%>
197+
<%-# NODES -%>
198198
<%- nodes.each do |node| -%>
199199
200200
/**
@@ -207,6 +207,7 @@ public abstract class Nodes {
207207
public static final class <%= node.name -%> extends Node {
208208
<%- if node.needs_serialized_length? -%>
209209
public final int serializedLength;
210+
public final Loader loader;
210211
<%- end -%>
211212
<%- if node.flags -%>
212213
public final short flags;
@@ -234,7 +235,10 @@ public abstract class Nodes {
234235
params = []
235236
params << "int nodeId" if Prism::Template::INCLUDE_NODE_ID
236237
params << "int startOffset" << "int length"
237-
params << "int serializedLength" if node.needs_serialized_length?
238+
if node.needs_serialized_length?
239+
params << "int serializedLength"
240+
params << "Loader loader"
241+
end
238242
params << "short flags" if node.flags
239243
params.concat(node.semantic_fields.map { |field| "#{field.java_type} #{field.name}" })
240244
-%>
@@ -246,6 +250,7 @@ public abstract class Nodes {
246250
<%- end -%>
247251
<%- if node.needs_serialized_length? -%>
248252
this.serializedLength = serializedLength;
253+
this.loader = loader;
249254
<%- end -%>
250255
<%- if node.flags -%>
251256
this.flags = flags;
@@ -254,7 +259,22 @@ public abstract class Nodes {
254259
this.<%= field.name %> = <%= field.name %>;
255260
<%- end -%>
256261
}
257-
<%# methods for flags -%>
262+
<%-# extra methods for DefNode -%>
263+
<%- if node.needs_serialized_length? -%>
264+
265+
public boolean isLazy() {
266+
return serializedLength < 0;
267+
}
268+
269+
public <%= node.name -%> getNonLazy() {
270+
if (isLazy()) {
271+
return loader.createDefNodeFromSavedPosition(<%= "nodeId, " if Prism::Template::INCLUDE_NODE_ID %>startOffset, length, -serializedLength);
272+
} else {
273+
return this;
274+
}
275+
}
276+
<%- end -%>
277+
<%-# methods for flags -%>
258278
<%- if (node_flags = node.flags) -%>
259279
<%- node_flags.values.each do |value| -%>
260280
@@ -263,7 +283,7 @@ public abstract class Nodes {
263283
}
264284
<%- end -%>
265285
<%- end -%>
266-
<%# potential override of setNewLineFlag() -%>
286+
<%-# potential override of setNewLineFlag() -%>
267287
<%- if node.newline == false -%>
268288
269289
@Override

templates/src/serialize.c.erb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ static void
5050
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
5151
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
5252

53-
size_t offset = buffer->length;
54-
5553
<%- if Prism::Template::INCLUDE_NODE_ID -%>
5654
pm_buffer_append_varuint(buffer, node->node_id);
5755
<%- end -%>
@@ -126,7 +124,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
126124
<%- end -%>
127125
<%- if node.needs_serialized_length? -%>
128126
// serialize length
129-
uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
127+
uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
130128
memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
131129
<%- end -%>
132130
break;

0 commit comments

Comments
 (0)