Skip to content

Commit

Permalink
GH-88116: Use a compact format to represent end line and column offse…
Browse files Browse the repository at this point in the history
…ts. (GH-91666)

* Stores all location info in linetable to conform to PEP 626.

* Remove column table from code objects.

* Remove end-line table from code objects.

* Document new location table format
  • Loading branch information
markshannon committed Apr 21, 2022
1 parent 2a5f171 commit 944fffe
Show file tree
Hide file tree
Showing 20 changed files with 853 additions and 533 deletions.
32 changes: 19 additions & 13 deletions Include/cpython/code.h
Expand Up @@ -86,15 +86,7 @@ typedef uint16_t _Py_CODEUNIT;
PyObject *co_filename; /* unicode (where it was loaded from) */ \
PyObject *co_name; /* unicode (name, for reference) */ \
PyObject *co_qualname; /* unicode (qualname, for reference) */ \
PyObject *co_linetable; /* bytes (encoding addr<->lineno mapping) \
See Objects/lnotab_notes.txt for details. \
*/ \
PyObject *co_endlinetable; /* bytes object that holds end lineno for \
instructions separated across different \
lines */ \
PyObject *co_columntable; /* bytes object that holds start/end column \
offset each instruction */ \
\
PyObject *co_linetable; /* bytes object that holds location info */ \
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
/* Scratch space for extra data relating to the code object. \
Type is a void* to keep the format private in codeobject.c to force \
Expand Down Expand Up @@ -153,13 +145,13 @@ PyAPI_FUNC(PyCodeObject *) PyCode_New(
int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, int, PyObject *,
PyObject *, PyObject *, PyObject *);
PyObject *);

PyAPI_FUNC(PyCodeObject *) PyCode_NewWithPosOnlyArgs(
int, int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, int, PyObject *,
PyObject *, PyObject *, PyObject *);
PyObject *);
/* same as struct above */

/* Creates a new empty code object with the specified source location. */
Expand All @@ -176,8 +168,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i
/* for internal use only */
struct _opaque {
int computed_line;
const char *lo_next;
const char *limit;
const uint8_t *lo_next;
const uint8_t *limit;
};

typedef struct _line_offsets {
Expand Down Expand Up @@ -210,6 +202,20 @@ PyAPI_FUNC(int) _PyCode_GetExtra(PyObject *code, Py_ssize_t index,
PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index,
void *extra);


typedef enum _PyCodeLocationInfoKind {
/* short forms are 0 to 9 */
PY_CODE_LOCATION_INFO_SHORT0 = 0,
/* one lineforms are 10 to 12 */
PY_CODE_LOCATION_INFO_ONE_LINE0 = 10,
PY_CODE_LOCATION_INFO_ONE_LINE1 = 11,
PY_CODE_LOCATION_INFO_ONE_LINE2 = 12,

PY_CODE_LOCATION_INFO_NO_COLUMNS = 13,
PY_CODE_LOCATION_INFO_LONG = 14,
PY_CODE_LOCATION_INFO_NONE = 15
} _PyCodeLocationInfoKind;

#ifdef __cplusplus
}
#endif
Expand Down
49 changes: 35 additions & 14 deletions Include/internal/pycore_code.h
Expand Up @@ -176,8 +176,6 @@ struct _PyCodeConstructor {
PyObject *code;
int firstlineno;
PyObject *linetable;
PyObject *endlinetable;
PyObject *columntable;

/* used by the code */
PyObject *consts;
Expand Down Expand Up @@ -221,21 +219,10 @@ extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
extern PyObject* _PyCode_GetCode(PyCodeObject *);

/* Return the ending source code line number from a bytecode index. */
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);

/* Return the ending source code line number from a bytecode index. */
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);
/* Return the starting source code column offset from a bytecode index. */
extern int _PyCode_Addr2Offset(PyCodeObject *, int);
/* Return the ending source code column offset from a bytecode index. */
extern int _PyCode_Addr2EndOffset(PyCodeObject *, int);

/** API for initializing the line number tables. */
extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
extern int _PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds);

/** Out of process API for initializing the line number table. */
/** Out of process API for initializing the location table. */
extern void _PyLineTable_InitAddressRange(
const char *linetable,
Py_ssize_t length,
Expand Down Expand Up @@ -445,6 +432,40 @@ read_obj(uint16_t *p)
return (PyObject *)val;
}

static inline int
write_varint(uint8_t *ptr, unsigned int val)
{
int written = 1;
while (val >= 64) {
*ptr++ = 64 | (val & 63);
val >>= 6;
written++;
}
*ptr = val;
return written;
}

static inline int
write_signed_varint(uint8_t *ptr, int val)
{
if (val < 0) {
val = ((-val)<<1) | 1;
}
else {
val = val << 1;
}
return write_varint(ptr, val);
}

static inline int
write_location_entry_start(uint8_t *ptr, int code, int length)
{
assert((code & 15) == code);
*ptr = 128 | (code << 3) | (length - 1);
return 1;
}


#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion Lib/importlib/_bootstrap_external.py
Expand Up @@ -402,7 +402,7 @@ def _write_atomic(path, data, mode=0o666):
# add JUMP_BACKWARD_NO_INTERRUPT, make JUMP_NO_INTERRUPT virtual)
# Python 3.11a7 3492 (make POP_JUMP_IF_NONE/NOT_NONE/TRUE/FALSE relative)
# Python 3.11a7 3493 (Make JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP relative)

# Python 3.11a7 3494 (New location info table)
# Python 3.12 will start with magic number 3500


Expand Down
169 changes: 130 additions & 39 deletions Lib/test/test_code.py
Expand Up @@ -230,9 +230,7 @@ def func(): pass
co.co_name,
co.co_qualname,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_linetable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars)
Expand Down Expand Up @@ -273,8 +271,6 @@ def func2():
("co_filename", "newfilename"),
("co_name", "newname"),
("co_linetable", code2.co_linetable),
("co_endlinetable", code2.co_endlinetable),
("co_columntable", code2.co_columntable),
):
with self.subTest(attr=attr, value=value):
new_code = code.replace(**{attr: value})
Expand Down Expand Up @@ -311,9 +307,7 @@ def func():
co.co_name,
co.co_qualname,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_linetable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars,
Expand Down Expand Up @@ -391,14 +385,17 @@ def test_co_positions_artificial_instructions(self):
)

def test_endline_and_columntable_none_when_no_debug_ranges(self):
# Make sure that if `-X no_debug_ranges` is used, the endlinetable and
# columntable are None.
# Make sure that if `-X no_debug_ranges` is used, there is
# minimal debug info
code = textwrap.dedent("""
def f():
pass
assert f.__code__.co_endlinetable is None
assert f.__code__.co_columntable is None
positions = f.__code__.co_positions()
for line, end_line, column, end_column in positions:
assert line == end_line
assert column is None
assert end_column is None
""")
assert_python_ok('-X', 'no_debug_ranges', '-c', code)

Expand All @@ -408,8 +405,11 @@ def test_endline_and_columntable_none_when_no_debug_ranges_env(self):
def f():
pass
assert f.__code__.co_endlinetable is None
assert f.__code__.co_columntable is None
positions = f.__code__.co_positions()
for line, end_line, column, end_column in positions:
assert line == end_line
assert column is None
assert end_column is None
""")
assert_python_ok('-c', code, PYTHONNODEBUGRANGES='1')

Expand All @@ -421,35 +421,10 @@ def func():
x = 1
new_code = func.__code__.replace(co_linetable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)

@requires_debug_ranges()
def test_co_positions_empty_endlinetable(self):
def func():
x = 1
new_code = func.__code__.replace(co_endlinetable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertIsNone(end_line)

@requires_debug_ranges()
def test_co_positions_empty_columntable(self):
def func():
x = 1
new_code = func.__code__.replace(co_columntable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertEqual(end_line, new_code.co_firstlineno + 1)
self.assertIsNone(column)
self.assertIsNone(end_column)


def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
Expand Down Expand Up @@ -527,6 +502,122 @@ def callback(code):
self.assertFalse(bool(coderef()))
self.assertTrue(self.called)

# Python implementation of location table parsing algorithm
def read(it):
return next(it)

def read_varint(it):
b = read(it)
val = b & 63;
shift = 0;
while b & 64:
b = read(it)
shift += 6
val |= (b&63) << shift
return val

def read_signed_varint(it):
uval = read_varint(it)
if uval & 1:
return -(uval >> 1)
else:
return uval >> 1

def parse_location_table(code):
line = code.co_firstlineno
it = iter(code.co_linetable)
while True:
try:
first_byte = read(it)
except StopIteration:
return
code = (first_byte >> 3) & 15
length = (first_byte & 7) + 1
if code == 15:
yield (code, length, None, None, None, None)
elif code == 14:
line_delta = read_signed_varint(it)
line += line_delta
end_line = line + read_varint(it)
col = read_varint(it)
if col == 0:
col = None
else:
col -= 1
end_col = read_varint(it)
if end_col == 0:
end_col = None
else:
end_col -= 1
yield (code, length, line, end_line, col, end_col)
elif code == 13: # No column
line_delta = read_signed_varint(it)
line += line_delta
yield (code, length, line, line, None, None)
elif code in (10, 11, 12): # new line
line_delta = code - 10
line += line_delta
column = read(it)
end_column = read(it)
yield (code, length, line, line, column, end_column)
else:
assert (0 <= code < 10)
second_byte = read(it)
column = code << 3 | (second_byte >> 4)
yield (code, length, line, line, column, column + (second_byte & 15))

def positions_from_location_table(code):
for _, length, line, end_line, col, end_col in parse_location_table(code):
for _ in range(length):
yield (line, end_line, col, end_col)

def misshappen():
"""
"""
x = (


4

+

y

)
y = (
a
+
b
+

d
)
return q if (

x

) else p


class CodeLocationTest(unittest.TestCase):

def check_positions(self, func):
pos1 = list(func.__code__.co_positions())
pos2 = list(positions_from_location_table(func.__code__))
for l1, l2 in zip(pos1, pos2):
self.assertEqual(l1, l2)
self.assertEqual(len(pos1), len(pos2))


def test_positions(self):
self.check_positions(parse_location_table)
self.check_positions(misshappen)


if check_impl_detail(cpython=True) and ctypes is not None:
py = ctypes.pythonapi
Expand Down

0 comments on commit 944fffe

Please sign in to comment.