Permalink
Browse files

Introduce the concept of asdl.const.NO_INTEGER.

It is the default value for unspecified ASDL integers.  See the comment
in const.py for the rationale.

Change span_id and line_id to be 0-based, with const.NO_INTEGER as the
uninitialized/unknown value.

All unit tests and spec tests pass.

Also:

benchmarks/oheap.sh: Try to encode everything in benchmarks/testdata.

This revealed that we should properly encode Array<Str>, which is now
done.  However there are still some lingering negative numbers.
  • Loading branch information...
Andy Chu
Andy Chu committed Nov 26, 2017
1 parent 86b8f78 commit 10c0897d96d125041773e149fb92ee10ee48294d
View
@@ -6,11 +6,13 @@
import io
import unittest
from asdl import arith_ast # module under test
from asdl import format as fmt
from asdl import py_meta
from asdl import encode
from asdl import asdl_
from asdl import const
from asdl import encode
from asdl import arith_ast # module under test
ArithVar = arith_ast.ArithVar
@@ -44,7 +46,7 @@ def testFieldDefaults(self):
t = arith_ast.token(5, 'x')
self.assertEqual(5, t.id)
self.assertEqual('x', t.value)
self.assertEqual(0, t.span_id)
self.assertEqual(const.NO_INTEGER, t.span_id)
def testTypeCheck(self):
v = ArithVar('name')
View
@@ -0,0 +1,36 @@
#!/usr/bin/python
"""
const.py
"""
DEFAULT_INT_WIDTH = 3 # 24 bits
# 2^24 - 1 is used as an invalid/uninitialized value for ASDL integers.
# Why? We have a few use cases for invalid/sentinel values:
# - span_id, line_id. Sometimes we don't have a span ID.
# - file descriptor: 'read x < f.txt' vs 'read x 0< f.txt'
#
# Other options for representation:
#
# 1. ADSL could use signed integers, then -1 is valid.
# 2. Use a type like fd = None | Some(int fd)
#
# I don't like #1 because ASDL is lazily-decoded, and then we have to do sign
# extension on demand. (24 bits to 32 or 64). As far as I can tell, sign
# extension requires a branch, at least in portable C (on the sign bit).
#
# Thes second option is semantically cleaner. But it needlessly
# inflates the size of both the source code and the data. Instead of having a
# single "inline" integer, we would need a reference to another value.
#
# We could also try to do some fancy thing like fd = None |
# Range<1..max_fd>(fd), with smart encoding. But that is overkill for these
# use cases.
#
# Using InvalidInt instead of -1 seems like a good compromise.
NO_INTEGER = (1 << (DEFAULT_INT_WIDTH * 8)) - 1
# NOTE: In Python: 1 << (n * 8) - 1 is wrong! I thought that bit shift would
# have higher precedence.
View
@@ -6,6 +6,7 @@
from asdl import asdl_ as asdl
from asdl import py_meta
from asdl import const
_DEFAULT_ALIGNMENT = 4
@@ -52,19 +53,18 @@ class Params:
with 24 bit pointers.
"""
def __init__(self, alignment=_DEFAULT_ALIGNMENT):
def __init__(self, alignment=_DEFAULT_ALIGNMENT,
int_width=const.DEFAULT_INT_WIDTH):
self.alignment = alignment
self.pointer_type = 'uint32_t'
self.tag_width = 1 # for ArithVar vs ArithWord.
self.ref_width = 3 # 24 bits
self.int_width = 3 # 24 bits
self.int_width = int_width
# used for fd, line/col
# also I guess steuff like SimpleCommand
self.index_width = 2 # 16 bits, e.g. max 64K entries in an array
# TODO: check for negative too
self.max_int = 1 << (self.ref_width * 8)
self.max_int = 1 << (self.int_width * 8)
self.max_index = 1 << (self.index_width * 8)
self.max_tag = 1 << (self.tag_width * 8)
@@ -75,15 +75,17 @@ def Tag(self, i, chunk):
def Int(self, n, chunk):
if n < 0:
raise Error('ASDL currently supports unsigned integers, got %d' % n)
raise RuntimeError(
"ASDL can't currently encode negative numbers. Got %d" % n)
if n > self.max_int:
raise Error('%d is too big to fit in %d bytes' % (n, self.int_width))
raise RuntimeError('%d is too big to fit in %d bytes' % (n, self.int_width))
for i in range(self.int_width):
chunk.append(n & 0xFF)
n >>= 8
def Ref(self, n, chunk):
# NOTE: ref width is currently the same as int width. Could be different.
self.Int(n, chunk)
def _Pad(self, chunk):
@@ -147,12 +149,16 @@ def EncodeArray(obj_list, item_desc, enc, out):
for item in obj_list:
enc.Int(item, array_chunk)
elif isinstance(item_desc, asdl.StrType):
for item in obj_list:
ref = out.Write(enc.PaddedStr(item))
enc.Ref(ref, array_chunk)
elif isinstance(item_desc, asdl.Sum) and asdl.is_simple(item_desc):
for item in obj_list:
enc.Int(item.enum_id, array_chunk)
else:
# A simple value is either an int, enum, or pointer. (Later: Iter<Str>
# might be possible for locality.)
assert isinstance(item_desc, asdl.Sum) or isinstance(
@@ -165,7 +171,12 @@ def EncodeArray(obj_list, item_desc, enc, out):
# Array<T>. Array implies O(1) random access; List doesn't.
for item in obj_list:
# Recursive call.
ref = EncodeObj(item, enc, out)
from core import util
try:
ref = EncodeObj(item, enc, out)
except RuntimeError as e:
util.log("Error encoding array: %s (item %s)", e, item)
raise
enc.Ref(ref, array_chunk)
this_ref = out.Write(enc.PaddedBlock(array_chunk))
@@ -232,16 +243,24 @@ def EncodeObj(obj, enc, out):
enc.Ref(ref, this_chunk)
elif isinstance(desc, asdl.UserType):
# Assume Id for now
enc.Int(field_val.enum_value, this_chunk)
if is_maybe and field_val is None: # e.g. id? prefix_op
enc.Ref(0, this_chunk)
else:
# Assume Id for now
enc.Int(field_val.enum_value, this_chunk)
else:
if is_maybe and field_val is None:
enc.Ref(0, this_chunk)
else:
# Recursive call for CompoundObj children. Write children before
# parents.
ref = EncodeObj(field_val, enc, out)
from core import util
try:
ref = EncodeObj(field_val, enc, out)
except RuntimeError as e:
util.log("Error encoding %s : %s (val %s)", name, e, field_val)
raise
enc.Ref(ref, this_chunk)
# Write the parent record
View
@@ -6,6 +6,7 @@
import unittest
from asdl import encode # module under test
from asdl import const
class EncoderTest(unittest.TestCase):
@@ -17,6 +18,10 @@ def testEncoder(self):
p.Int(1, chunk)
self.assertEqual(b'\x01\x00\x00', chunk)
chunk = bytearray()
p.Int(const.NO_INTEGER, chunk)
self.assertEqual(b'\xff\xff\xff', chunk)
chunk = p.PaddedBytes('0123456789')
# 2 byte length -- max 64K entries
self.assertEqual(b'\x0A\x000123456789\x00\x00\x00\x00', bytes(chunk))
View
@@ -76,6 +76,15 @@ void PrintCommand(const uint32_t* base, const command_t& e, int indent) {
}
break;
}
case command_e::Assignment: {
auto& e2 = static_cast<const Assignment&>(e);
printf("Assignment flags: ");
for (int i = 0; i < e2.flags_size(base); ++i) {
printf("%s ", e2.flags(base, i));
}
printf("\n");
break;
}
case command_e::CommandList: {
auto& e2 = static_cast<const CommandList&>(e);
printf("CommandList %d\n", e2.children_size(base));
View
@@ -29,8 +29,9 @@
import io
import sys
from asdl import format as fmt
from asdl import asdl_ as asdl
from asdl import const
from asdl import format as fmt
from core import util
@@ -166,7 +167,7 @@ def _SetDefaults(self):
if isinstance(desc, asdl.MaybeType):
child = desc.desc
if isinstance(child, asdl.IntType):
value = 0
value = const.NO_INTEGER
elif isinstance(child, asdl.StrType):
value = ''
else:
View
@@ -116,6 +116,7 @@ osh-demo() {
local data=_tmp/${name}.bin
local code='echo hi; echo bye # comment'
local code='declare -r -x foo' # for testing repeated array
#local code='echo $(( 2 + 3 ))'
#local code='echo $(( -2 * -3 ))' # test negative integers
bin/osh -n --ast-format oheap -c "$code" > $data
View
@@ -0,0 +1,41 @@
#!/bin/bash
#
# Test the size of file, encoding, and decoding speed.
#
# Usage:
# ./oheap.sh <function name>
set -o nounset
set -o pipefail
set -o errexit
encode-one() {
local script=$1
local oheap_out=$2
bin/osh -n --ast-format oheap "$script" > $oheap_out
}
task-spec() {
while read path; do
echo "$path _tmp/oheap/$(basename $path).oheap"
done < benchmarks/osh-parser-files.txt
}
run() {
mkdir -p _tmp/oheap
local results=_tmp/oheap/results.csv
echo 'status,elapsed_secs' > $results
task-spec | xargs -n 2 --verbose -- \
benchmarks/time.py --output $results -- \
$0 encode-one
}
stats() {
ls -l -h _tmp/oheap
echo
cat _tmp/oheap/results.csv
}
"$@"
View
@@ -144,9 +144,6 @@ run() {
echo "$sh_path ID: $shell_hash"
# TODO: Shell ID should be separate columns?
# It's really shell_version_id?
if ! test -n "$preview"; then
# 20ms for ltmain.sh; 34ms for configure
cat $sorted | xargs -n 1 -- $0 \
View
@@ -11,6 +11,9 @@
Also, we don't want to save comment lines.
"""
from asdl import const
class Arena(object):
"""A collection of lines and line spans.
@@ -33,8 +36,8 @@ def __init__(self, arena_id):
self.next_line_id = 0
# first real span is 1. 0 means undefined.
self.spans = [None]
self.next_span_id = 1
self.spans = []
self.next_span_id = 0
# List of (src_path index, physical line number). This is two integers for
# every line read. We could use a clever encoding of this. (Although the
@@ -95,12 +98,12 @@ def AddLineSpan(self, line_span):
return span_id
def GetLineSpan(self, span_id):
assert span_id > 0, span_id
assert span_id != const.NO_INTEGER, span_id
return self.spans[span_id] # span IDs start from 1
def GetDebugInfo(self, line_id):
"""Get the path and physical line number, for parse errors."""
assert line_id >= 0
assert line_id != const.NO_INTEGER, line_id
src_id , line_num = self.debug_info[line_id]
try:
path = self.src_paths[src_id]
View
@@ -24,7 +24,7 @@ def testPool(self):
self.assertEqual(1, line_id)
span_id = arena.AddLineSpan(None)
self.assertEqual(1, span_id)
self.assertEqual(0, span_id)
arena.PopSource()
View
@@ -21,6 +21,8 @@
import sys
import time
from asdl import const
from core import args
from core import braces
from core import expr_eval
@@ -360,7 +362,7 @@ def _EvalLhs(self, node):
raise AssertionError(node.tag)
def _EvalRedirect(self, n):
fd = REDIR_DEFAULT_FD[n.op_id] if n.fd == -1 else n.fd
fd = REDIR_DEFAULT_FD[n.op_id] if n.fd == const.NO_INTEGER else n.fd
if n.tag == redir_e.Redir:
redir_type = REDIR_TYPE[n.op_id] # could be static in the LST?
Oops, something went wrong.

0 comments on commit 10c0897

Please sign in to comment.