Skip to content

Commit 90d8623

Browse files
committed
Rename varint as varuint
Line numbers may be negative, so we need to introduce signed varint, so renaming unsigned ones first avoid confusion.
1 parent fda510b commit 90d8623

File tree

7 files changed

+88
-88
lines changed

7 files changed

+88
-88
lines changed

docs/serialization.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ The syntax tree still requires a copy of the original source, as for the most pa
99

1010
Let us define some simple types for readability.
1111

12-
### varint
12+
### varuint
1313

1414
A variable-length integer with the value fitting in `uint32_t` using between 1 and 5 bytes, using the [LEB128](https://en.wikipedia.org/wiki/LEB128) encoding.
1515
This drastically cuts down on the size of the serialized string, especially when the source file is large.
@@ -18,15 +18,15 @@ This drastically cuts down on the size of the serialized string, especially when
1818

1919
| # bytes | field |
2020
| --- | --- |
21-
| varint | the length of the string in bytes |
21+
| varuint | the length of the string in bytes |
2222
| ... | the string bytes |
2323

2424
### location
2525

2626
| # bytes | field |
2727
| --- | --- |
28-
| varint | byte offset into the source string where this location begins |
29-
| varint | length of the location in bytes in the source string |
28+
| varuint | byte offset into the source string where this location begins |
29+
| varuint | length of the location in bytes in the source string |
3030

3131
### comment
3232

@@ -71,18 +71,18 @@ The header is structured like the following table:
7171
| `1` | patch version number |
7272
| `1` | 1 indicates only semantics fields were serialized, 0 indicates all fields were serialized (including location fields) |
7373
| string | the encoding name |
74-
| varint | the start line |
75-
| varint | number of comments |
74+
| varuint | the start line |
75+
| varuint | number of comments |
7676
| comment* | comments |
77-
| varint | number of magic comments |
77+
| varuint | number of magic comments |
7878
| magic comment* | magic comments |
7979
| location? | the optional location of the `__END__` keyword and its contents |
80-
| varint | number of errors |
80+
| varuint | number of errors |
8181
| diagnostic* | errors |
82-
| varint | number of warnings |
82+
| varuint | number of warnings |
8383
| diagnostic* | warnings |
8484
| `4` | content pool offset |
85-
| varint | content pool size |
85+
| varuint | content pool size |
8686

8787
After the header comes the body of the serialized string.
8888
The body consists of a sequence of nodes that is built using a prefix traversal order of the syntax tree.
@@ -159,7 +159,7 @@ serialize(const uint8_t *source, size_t length) {
159159
}
160160
```
161161

162-
The final argument to `pm_serialize_parse` is an optional string that controls the options to the parse function. This includes all of the normal options that could be passed to `pm_parser_init` through a `pm_options_t` struct, but serialized as a string to make it easier for callers through FFI. Note that no `varint` are used here to make it easier to produce the data for the caller, and also serialized size is less important here. The format of the data is structured as follows:
162+
The final argument to `pm_serialize_parse` is an optional string that controls the options to the parse function. This includes all of the normal options that could be passed to `pm_parser_init` through a `pm_options_t` struct, but serialized as a string to make it easier for callers through FFI. Note that no `varuint` are used here to make it easier to produce the data for the caller, and also serialized size is less important here. The format of the data is structured as follows:
163163

164164
| # bytes | field |
165165
| ------- | -------------------------- |

include/prism/util/pm_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
118118
* @param buffer The buffer to append to.
119119
* @param value The integer to append.
120120
*/
121-
void pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value);
121+
void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
122122

123123
/**
124124
* Concatenate one buffer onto another.

src/prism.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17268,7 +17268,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
1726817268
pm_node_t *node = pm_parse(&parser);
1726917269
pm_serialize_header(buffer);
1727017270
pm_serialize_encoding(&parser.encoding, buffer);
17271-
pm_buffer_append_varint(buffer, parser.start_line);
17271+
pm_buffer_append_varuint(buffer, parser.start_line);
1727217272
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
1727317273

1727417274
pm_node_destroy(&parser, node);

src/util/pm_buffer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
138138
* Append a 32-bit unsigned integer to the buffer as a variable-length integer.
139139
*/
140140
void
141-
pm_buffer_append_varint(pm_buffer_t *buffer, uint32_t value) {
141+
pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
142142
if (value < 128) {
143143
pm_buffer_append_byte(buffer, (uint8_t) value);
144144
} else {

templates/java/org/prism/Loader.java.erb

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,23 +104,23 @@ public class Loader {
104104
expect((byte) 1, "Loader.java requires no location fields in the serialized output");
105105

106106
// This loads the name of the encoding.
107-
int encodingLength = loadVarInt();
107+
int encodingLength = loadVarUInt();
108108
byte[] encodingNameBytes = new byte[encodingLength];
109109
buffer.get(encodingNameBytes);
110110
this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII);
111111
<%- if string_type == "String" -%>
112112
this.encodingCharset = getEncodingCharset(this.encodingName);
113113
<%- end -%>
114114

115-
source.setStartLine(loadVarInt());
115+
source.setStartLine(loadVarUInt());
116116

117117
ParseResult.MagicComment[] magicComments = loadMagicComments();
118118
Nodes.Location dataLocation = loadOptionalLocation();
119119
ParseResult.Error[] errors = loadSyntaxErrors();
120120
ParseResult.Warning[] warnings = loadWarnings();
121121

122122
int constantPoolBufferOffset = buffer.getInt();
123-
int constantPoolLength = loadVarInt();
123+
int constantPoolLength = loadVarUInt();
124124
this.constantPool = new ConstantPool(this, source.bytes, constantPoolBufferOffset, constantPoolLength);
125125

126126
Nodes.Node node = loadNode();
@@ -138,7 +138,7 @@ public class Loader {
138138
}
139139

140140
private byte[] loadEmbeddedString() {
141-
int length = loadVarInt();
141+
int length = loadVarUInt();
142142
byte[] bytes = new byte[length];
143143
buffer.get(bytes);
144144
return bytes;
@@ -147,8 +147,8 @@ public class Loader {
147147
private byte[] loadString() {
148148
switch (buffer.get()) {
149149
case 1:
150-
int start = loadVarInt();
151-
int length = loadVarInt();
150+
int start = loadVarUInt();
151+
int length = loadVarUInt();
152152
byte[] bytes = new byte[length];
153153
System.arraycopy(source.bytes, start, bytes, 0, length);
154154
return bytes;
@@ -160,7 +160,7 @@ public class Loader {
160160
}
161161

162162
private ParseResult.MagicComment[] loadMagicComments() {
163-
int count = loadVarInt();
163+
int count = loadVarUInt();
164164
ParseResult.MagicComment[] magicComments = new ParseResult.MagicComment[count];
165165

166166
for (int i = 0; i < count; i++) {
@@ -175,7 +175,7 @@ public class Loader {
175175
}
176176

177177
private ParseResult.Error[] loadSyntaxErrors() {
178-
int count = loadVarInt();
178+
int count = loadVarUInt();
179179
ParseResult.Error[] errors = new ParseResult.Error[count];
180180

181181
// error messages only contain ASCII characters
@@ -192,7 +192,7 @@ public class Loader {
192192
}
193193

194194
private ParseResult.Warning[] loadWarnings() {
195-
int count = loadVarInt();
195+
int count = loadVarUInt();
196196
ParseResult.Warning[] warnings = new ParseResult.Warning[count];
197197

198198
// warning messages only contain ASCII characters
@@ -218,7 +218,7 @@ public class Loader {
218218
}
219219

220220
private <%= string_type %> loadConstant() {
221-
return constantPool.get(buffer, loadVarInt());
221+
return constantPool.get(buffer, loadVarUInt());
222222
}
223223

224224
private <%= string_type %> loadOptionalConstant() {
@@ -231,19 +231,19 @@ public class Loader {
231231
}
232232

233233
private <%= string_type %>[] loadConstants() {
234-
int length = loadVarInt();
234+
int length = loadVarUInt();
235235
if (length == 0) {
236236
return Nodes.EMPTY_STRING_ARRAY;
237237
}
238238
<%= string_type %>[] constants = new <%= string_type %>[length];
239239
for (int i = 0; i < length; i++) {
240-
constants[i] = constantPool.get(buffer, loadVarInt());
240+
constants[i] = constantPool.get(buffer, loadVarUInt());
241241
}
242242
return constants;
243243
}
244244

245245
private Nodes.Node[] loadNodes() {
246-
int length = loadVarInt();
246+
int length = loadVarUInt();
247247
if (length == 0) {
248248
return Nodes.Node.EMPTY_ARRAY;
249249
}
@@ -255,7 +255,7 @@ public class Loader {
255255
}
256256

257257
private Nodes.Location loadLocation() {
258-
return new Nodes.Location(loadVarInt(), loadVarInt());
258+
return new Nodes.Location(loadVarUInt(), loadVarUInt());
259259
}
260260

261261
private Nodes.Location loadOptionalLocation() {
@@ -267,7 +267,7 @@ public class Loader {
267267
}
268268

269269
// From https://github.com/protocolbuffers/protobuf/blob/v23.1/java/core/src/main/java/com/google/protobuf/BinaryReader.java#L1507
270-
private int loadVarInt() {
270+
private int loadVarUInt() {
271271
int x;
272272
if ((x = buffer.get()) >= 0) {
273273
return x;
@@ -285,15 +285,15 @@ public class Loader {
285285
}
286286

287287
private short loadFlags() {
288-
int flags = loadVarInt();
288+
int flags = loadVarUInt();
289289
assert flags >= 0 && flags <= Short.MAX_VALUE;
290290
return (short) flags;
291291
}
292292

293293
private Nodes.Node loadNode() {
294294
int type = buffer.get() & 0xFF;
295-
int startOffset = loadVarInt();
296-
int length = loadVarInt();
295+
int startOffset = loadVarUInt();
296+
int length = loadVarUInt();
297297

298298
switch (type) {
299299
<%- nodes.each_with_index do |node, index| -%>
@@ -311,7 +311,7 @@ public class Loader {
311311
when Prism::ConstantListField then "loadConstants()"
312312
when Prism::LocationField then "loadLocation()"
313313
when Prism::OptionalLocationField then "loadOptionalLocation()"
314-
when Prism::UInt32Field then "loadVarInt()"
314+
when Prism::UInt32Field then "loadVarUInt()"
315315
when Prism::FlagsField then "loadFlags()"
316316
else raise
317317
end

templates/lib/prism/serialize.rb.erb

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,18 @@ module Prism
7373
end
7474

7575
def load_encoding
76-
@encoding = Encoding.find(io.read(load_varint))
76+
@encoding = Encoding.find(io.read(load_varuint))
7777
@input = input.force_encoding(@encoding).freeze
7878
@encoding
7979
end
8080

8181
def load_start_line
82-
source.start_line = load_varint
82+
source.start_line = load_varuint
8383
end
8484

8585
def load_comments
86-
load_varint.times.map do
87-
case load_varint
86+
load_varuint.times.map do
87+
case load_varuint
8888
when 0 then InlineComment.new(load_location)
8989
when 1 then EmbDocComment.new(load_location)
9090
when 2 then DATAComment.new(load_location)
@@ -94,19 +94,19 @@ module Prism
9494

9595
def load_metadata
9696
comments = load_comments
97-
magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
97+
magic_comments = load_varuint.times.map { MagicComment.new(load_location, load_location) }
9898
data_loc = load_optional_location
99-
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
100-
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
99+
errors = load_varuint.times.map { ParseError.new(load_embedded_string, load_location) }
100+
warnings = load_varuint.times.map { ParseWarning.new(load_embedded_string, load_location) }
101101
[comments, magic_comments, data_loc, errors, warnings]
102102
end
103103

104104
def load_tokens
105105
tokens = []
106-
while type = TOKEN_TYPES.fetch(load_varint)
107-
start = load_varint
108-
length = load_varint
109-
lex_state = load_varint
106+
while type = TOKEN_TYPES.fetch(load_varuint)
107+
start = load_varuint
108+
length = load_varuint
109+
lex_state = load_varuint
110110
location = Location.new(@source, start, length)
111111
tokens << [Prism::Token.new(type, location.slice, location), lex_state]
112112
end
@@ -133,7 +133,7 @@ module Prism
133133
comments, magic_comments, data_loc, errors, warnings = load_metadata
134134

135135
@constant_pool_offset = io.read(4).unpack1("L")
136-
@constant_pool = Array.new(load_varint, nil)
136+
@constant_pool = Array.new(load_varuint, nil)
137137

138138
[load_node, comments, magic_comments, data_loc, errors, warnings]
139139
end
@@ -147,7 +147,7 @@ module Prism
147147

148148
# variable-length integer using https://en.wikipedia.org/wiki/LEB128
149149
# This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
150-
def load_varint
150+
def load_varuint
151151
n = io.getbyte
152152
if n < 128
153153
n
@@ -173,14 +173,14 @@ module Prism
173173
end
174174

175175
def load_embedded_string
176-
io.read(load_varint).force_encoding(encoding)
176+
io.read(load_varuint).force_encoding(encoding)
177177
end
178178

179179
def load_string
180180
type = io.getbyte
181181
case type
182182
when 1
183-
input.byteslice(load_varint, load_varint).force_encoding(encoding)
183+
input.byteslice(load_varuint, load_varuint).force_encoding(encoding)
184184
when 2
185185
load_embedded_string
186186
else
@@ -189,7 +189,7 @@ module Prism
189189
end
190190

191191
def load_location
192-
Location.new(source, load_varint, load_varint)
192+
Location.new(source, load_varuint, load_varuint)
193193
end
194194

195195
def load_optional_location
@@ -218,11 +218,11 @@ module Prism
218218
end
219219

220220
def load_required_constant
221-
load_constant(load_varint - 1)
221+
load_constant(load_varuint - 1)
222222
end
223223

224224
def load_optional_constant
225-
index = load_varint
225+
index = load_varuint
226226
load_constant(index - 1) if index != 0
227227
end
228228

@@ -242,13 +242,13 @@ module Prism
242242
when Prism::NodeField then "load_node"
243243
when Prism::OptionalNodeField then "load_optional_node"
244244
when Prism::StringField then "load_string"
245-
when Prism::NodeListField then "Array.new(load_varint) { load_node }"
245+
when Prism::NodeListField then "Array.new(load_varuint) { load_node }"
246246
when Prism::ConstantField then "load_required_constant"
247247
when Prism::OptionalConstantField then "load_optional_constant"
248-
when Prism::ConstantListField then "Array.new(load_varint) { load_required_constant }"
248+
when Prism::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
249249
when Prism::LocationField then "load_location"
250250
when Prism::OptionalLocationField then "load_optional_location"
251-
when Prism::UInt32Field, Prism::FlagsField then "load_varint"
251+
when Prism::UInt32Field, Prism::FlagsField then "load_varuint"
252252
else raise
253253
end
254254
} + ["location"]).join(", ") -%>)
@@ -275,13 +275,13 @@ module Prism
275275
when Prism::NodeField then "load_node"
276276
when Prism::OptionalNodeField then "load_optional_node"
277277
when Prism::StringField then "load_string"
278-
when Prism::NodeListField then "Array.new(load_varint) { load_node }"
278+
when Prism::NodeListField then "Array.new(load_varuint) { load_node }"
279279
when Prism::ConstantField then "load_required_constant"
280280
when Prism::OptionalConstantField then "load_optional_constant"
281-
when Prism::ConstantListField then "Array.new(load_varint) { load_required_constant }"
281+
when Prism::ConstantListField then "Array.new(load_varuint) { load_required_constant }"
282282
when Prism::LocationField then "load_location"
283283
when Prism::OptionalLocationField then "load_optional_location"
284-
when Prism::UInt32Field, Prism::FlagsField then "load_varint"
284+
when Prism::UInt32Field, Prism::FlagsField then "load_varuint"
285285
else raise
286286
end
287287
} + ["location"]).join(", ") -%>)

0 commit comments

Comments
 (0)