From d139aa41fb3a4a9b049d3236857e87ab10761a1b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 1 Aug 2024 22:19:44 +0300 Subject: [PATCH 1/3] gh-122595: Add more error checks in the compiler --- Python/compile.c | 91 ++++++++++++++++++++++++++++++--------- Python/symtable.c | 106 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 153 insertions(+), 44 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 02b5345cedd0a3..9a782eb2372aa5 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -505,21 +505,36 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) deterministic, then the generated bytecode is not deterministic. */ sorted_keys = PyDict_Keys(src); - if (sorted_keys == NULL) + if (sorted_keys == NULL) { + Py_DECREF(dest); return NULL; + } if (PyList_Sort(sorted_keys) != 0) { Py_DECREF(sorted_keys); + Py_DECREF(dest); return NULL; } num_keys = PyList_GET_SIZE(sorted_keys); for (key_i = 0; key_i < num_keys; key_i++) { - /* XXX this should probably be a macro in symtable.h */ long vi; k = PyList_GET_ITEM(sorted_keys, key_i); v = PyDict_GetItemWithError(src, k); - assert(v && PyLong_Check(v)); - vi = PyLong_AS_LONG(v); + if (!v) { + if (!PyErr_Occurred()) { + PyErr_SetObject(PyExc_KeyError, k); + } + Py_DECREF(sorted_keys); + Py_DECREF(dest); + return NULL; + } + vi = PyLong_AsLong(v); + if (vi == -1 && PyErr_Occurred()) { + Py_DECREF(sorted_keys); + Py_DECREF(dest); + return NULL; + } + /* XXX this should probably be a macro in symtable.h */ scope = (vi >> SCOPE_OFFSET) & SCOPE_MASK; if (scope == scope_type || vi & flag) { @@ -623,6 +638,9 @@ compiler_set_qualname(struct compiler *c) scope = _PyST_GetScope(parent->u_ste, mangled); Py_DECREF(mangled); + if (scope < 0) { + return ERROR; + } assert(scope != GLOBAL_IMPLICIT); if (scope == GLOBAL_EXPLICIT) force_global = 1; @@ -1640,7 +1658,7 @@ dict_lookup_arg(PyObject *dict, PyObject *name) if (v == NULL) { return ERROR; } - return PyLong_AS_LONG(v); + return PyLong_AsLong(v); } static int @@ -1663,7 +1681,7 @@ compiler_lookup_arg(struct compiler *c, PyCodeObject *co, PyObject *name) else { arg = dict_lookup_arg(c->u->u_metadata.u_freevars, name); } - if (arg == -1) { + if (arg == -1 && !PyErr_Occurred()) { PyObject *freevars = _PyCode_GetFreevars(co); if (freevars == NULL) { PyErr_Clear(); @@ -4077,6 +4095,8 @@ compiler_nameop(struct compiler *c, location loc, case GLOBAL_EXPLICIT: optype = OP_GLOBAL; break; + case -1: + goto error; default: /* scope can be 0 */ break; @@ -4630,6 +4650,9 @@ is_import_originated(struct compiler *c, expr_ty e) } long flags = _PyST_GetSymbol(SYMTABLE(c)->st_top, e->v.Name.id); + if (flags < 0) { + return ERROR; + } return flags & DEF_IMPORT; } @@ -4649,10 +4672,16 @@ can_optimize_super_call(struct compiler *c, expr_ty attr) PyObject *super_name = e->v.Call.func->v.Name.id; // detect statically-visible shadowing of 'super' name int scope = _PyST_GetScope(SYMTABLE_ENTRY(c), super_name); + if (scope < 0) { + return -1; + } if (scope != GLOBAL_IMPLICIT) { return 0; } scope = _PyST_GetScope(SYMTABLE(c)->st_top, super_name); + if (scope < 0) { + return -1; + } if (scope != 0) { return 0; } @@ -4759,7 +4788,11 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) } /* Check that the base object is not something that is imported */ - if (is_import_originated(c, meth->v.Attribute.value)) { + int ret = is_import_originated(c, meth->v.Attribute.value); + if (ret < 0) { + return ERROR; + } + if (ret) { return 0; } @@ -4787,7 +4820,11 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) /* Alright, we can optimize the code. */ location loc = LOC(meth); - if (can_optimize_super_call(c, meth)) { + ret = can_optimize_super_call(c, meth); + if (ret < 0) { + return ERROR; + } + if (ret) { RETURN_IF_ERROR(load_args_for_super(c, meth->v.Attribute.value)); int opcode = asdl_seq_LEN(meth->v.Attribute.value->v.Call.args) ? LOAD_SUPER_METHOD : LOAD_ZERO_SUPER_METHOD; @@ -5359,8 +5396,10 @@ push_inlined_comprehension_state(struct compiler *c, location loc, PyObject *k, *v; Py_ssize_t pos = 0; while (PyDict_Next(entry->ste_symbols, &pos, &k, &v)) { - assert(PyLong_Check(v)); - long symbol = PyLong_AS_LONG(v); + long symbol = PyLong_AsLong(v); + if (symbol == -1 && PyErr_Occurred()) { + return ERROR; + } long scope = (symbol >> SCOPE_OFFSET) & SCOPE_MASK; PyObject *outv = PyDict_GetItemWithError(SYMTABLE_ENTRY(c)->ste_symbols, k); if (outv == NULL) { @@ -5369,8 +5408,11 @@ push_inlined_comprehension_state(struct compiler *c, location loc, } outv = _PyLong_GetZero(); } - assert(PyLong_CheckExact(outv)); - long outsc = (PyLong_AS_LONG(outv) >> SCOPE_OFFSET) & SCOPE_MASK; + long outsymbol = PyLong_AsLong(outv); + if (outsymbol == -1 && PyErr_Occurred()) { + return ERROR; + } + long outsc = (outsymbol >> SCOPE_OFFSET) & SCOPE_MASK; // If a name has different scope inside than outside the comprehension, // we need to temporarily handle it with the right scope while // compiling the comprehension. If it's free in the comprehension @@ -6056,14 +6098,20 @@ compiler_visit_expr(struct compiler *c, expr_ty e) return compiler_formatted_value(c, e); /* The following exprs can be assignment targets. */ case Attribute_kind: - if (e->v.Attribute.ctx == Load && can_optimize_super_call(c, e)) { - RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value)); - int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ? - LOAD_SUPER_ATTR : LOAD_ZERO_SUPER_ATTR; - ADDOP_NAME(c, loc, opcode, e->v.Attribute.attr, names); - loc = update_start_location_to_match_attr(c, loc, e); - ADDOP(c, loc, NOP); - return SUCCESS; + if (e->v.Attribute.ctx == Load) { + int ret = can_optimize_super_call(c, e); + if (ret < 0) { + return ERROR; + } + if (ret) { + RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value)); + int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ? + LOAD_SUPER_ATTR : LOAD_ZERO_SUPER_ATTR; + ADDOP_NAME(c, loc, opcode, e->v.Attribute.attr, names); + loc = update_start_location_to_match_attr(c, loc, e); + ADDOP(c, loc, NOP); + return SUCCESS; + } } if (e->v.Attribute.value->kind == Name_kind && _PyUnicode_EqualToASCIIString(e->v.Attribute.value->v.Name.id, "self")) @@ -7296,7 +7344,8 @@ consts_dict_keys_inorder(PyObject *dict) if (consts == NULL) return NULL; while (PyDict_Next(dict, &pos, &k, &v)) { - i = PyLong_AS_LONG(v); + assert(PyLong_CheckExact(v)); + i = PyLong_AsLong(v); /* The keys of the dictionary can be tuples wrapping a constant. * (see dict_add_o and _PyCode_ConstantKey). In that case * the object we want is always second. */ diff --git a/Python/symtable.c b/Python/symtable.c index 88af37198bfba5..6acf9fa41cb180 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -531,16 +531,26 @@ long _PyST_GetSymbol(PySTEntryObject *ste, PyObject *name) { PyObject *v = PyDict_GetItemWithError(ste->ste_symbols, name); - if (!v) - return 0; - assert(PyLong_Check(v)); - return PyLong_AS_LONG(v); + if (!v) { + return PyErr_Occurred() ? -1 : 0; + } + long symbol = PyLong_AsLong(v); + if (symbol < 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_SystemError, "invalid symbol"); + } + return -1; + } + return symbol; } int _PyST_GetScope(PySTEntryObject *ste, PyObject *name) { long symbol = _PyST_GetSymbol(ste, name); + if (symbol < 0) { + return -1; + } return (symbol >> SCOPE_OFFSET) & SCOPE_MASK; } @@ -720,11 +730,14 @@ analyze_name(PySTEntryObject *ste, PyObject *scopes, PyObject *name, long flags, // global statement), we want to also treat it as a global in this scope. if (class_entry != NULL) { long class_flags = _PyST_GetSymbol(class_entry, name); + if (class_flags < 0) { + return 0; + } if (class_flags & DEF_GLOBAL) { SET_SCOPE(scopes, name, GLOBAL_EXPLICIT); return 1; } - else if (class_flags & DEF_BOUND && !(class_flags & DEF_NONLOCAL)) { + else if ((class_flags & DEF_BOUND) && !(class_flags & DEF_NONLOCAL)) { SET_SCOPE(scopes, name, GLOBAL_IMPLICIT); return 1; } @@ -771,6 +784,9 @@ is_free_in_any_child(PySTEntryObject *entry, PyObject *key) PySTEntryObject *child_ste = (PySTEntryObject *)PyList_GET_ITEM( entry->ste_children, i); long scope = _PyST_GetScope(child_ste, key); + if (scope < 0) { + return -1; + } if (scope == FREE) { return 1; } @@ -789,7 +805,10 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, while (PyDict_Next(comp->ste_symbols, &pos, &k, &v)) { // skip comprehension parameter - long comp_flags = PyLong_AS_LONG(v); + long comp_flags = PyLong_AsLong(v); + if (comp_flags == -1 && PyErr_Occurred()) { + return 0; + } if (comp_flags & DEF_PARAM) { assert(_PyUnicode_EqualToASCIIString(k, ".0")); continue; @@ -830,11 +849,19 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, SET_SCOPE(scopes, k, scope); } else { - if (PyLong_AsLong(existing) & DEF_BOUND) { + long flags = PyLong_AsLong(existing); + if (flags == -1 && PyErr_Occurred()) { + return 0; + } + if ((flags & DEF_BOUND) && ste->ste_type != ClassBlock) { // free vars in comprehension that are locals in outer scope can // now simply be locals, unless they are free in comp children, // or if the outer scope is a class block - if (!is_free_in_any_child(comp, k) && ste->ste_type != ClassBlock) { + int ok = is_free_in_any_child(comp, k); + if (ok < 0) { + return 0; + } + if (!ok) { if (PySet_Discard(comp_free, k) < 0) { return 0; } @@ -870,9 +897,10 @@ analyze_cells(PyObject *scopes, PyObject *free, PyObject *inlined_cells) if (!v_cell) return 0; while (PyDict_Next(scopes, &pos, &name, &v)) { - long scope; - assert(PyLong_Check(v)); - scope = PyLong_AS_LONG(v); + long scope = PyLong_AsLong(v); + if (scope == -1 && PyErr_Occurred()) { + goto error; + } if (scope != LOCAL) continue; int contains = PySet_Contains(free, name); @@ -935,9 +963,10 @@ update_symbols(PyObject *symbols, PyObject *scopes, /* Update scope information for all symbols in this scope */ while (PyDict_Next(symbols, &pos, &name, &v)) { - long scope, flags; - assert(PyLong_Check(v)); - flags = PyLong_AS_LONG(v); + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + return 0; + } int contains = PySet_Contains(inlined_cells, name); if (contains < 0) { return 0; @@ -946,8 +975,16 @@ update_symbols(PyObject *symbols, PyObject *scopes, flags |= DEF_COMP_CELL; } v_scope = PyDict_GetItemWithError(scopes, name); - assert(v_scope && PyLong_Check(v_scope)); - scope = PyLong_AS_LONG(v_scope); + if (!v_scope) { + if (!PyErr_Occurred()) { + PyErr_SetObject(PyExc_KeyError, name); + } + return 0; + } + long scope = PyLong_AsLong(v_scope); + if (scope == -1 && PyErr_Occurred()) { + return 0; + } flags |= (scope << SCOPE_OFFSET); v_new = PyLong_FromLong(flags); if (!v_new) @@ -980,7 +1017,11 @@ update_symbols(PyObject *symbols, PyObject *scopes, or global in the class scope. */ if (classflag) { - long flags = PyLong_AS_LONG(v) | DEF_FREE_CLASS; + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + goto error; + } + flags |= DEF_FREE_CLASS; v_new = PyLong_FromLong(flags); if (!v_new) { goto error; @@ -1119,7 +1160,10 @@ analyze_block(PySTEntryObject *ste, PyObject *bound, PyObject *free, } while (PyDict_Next(ste->ste_symbols, &pos, &name, &v)) { - long flags = PyLong_AS_LONG(v); + long flags = PyLong_AsLong(v); + if (flags == -1 && PyErr_Occurred()) { + goto error; + } if (!analyze_name(ste, scopes, name, flags, bound, local, free, global, type_params, class_entry)) goto error; @@ -1407,9 +1451,12 @@ symtable_lookup_entry(struct symtable *st, PySTEntryObject *ste, PyObject *name) { PyObject *mangled = _Py_MaybeMangle(st->st_private, ste, name); if (!mangled) - return 0; + return -1; long ret = _PyST_GetSymbol(ste, mangled); Py_DECREF(mangled); + if (ret < 0) { + return -1; + } return ret; } @@ -1432,7 +1479,10 @@ symtable_add_def_helper(struct symtable *st, PyObject *name, int flag, struct _s return 0; dict = ste->ste_symbols; if ((o = PyDict_GetItemWithError(dict, mangled))) { - val = PyLong_AS_LONG(o); + val = PyLong_AsLong(o); + if (val == -1 && PyErr_Occurred()) { + goto error; + } if ((flag & DEF_PARAM) && (val & DEF_PARAM)) { /* Is it better to use 'mangled' or 'name' here? */ PyErr_Format(PyExc_SyntaxError, DUPLICATE_ARGUMENT, name); @@ -1478,16 +1528,20 @@ symtable_add_def_helper(struct symtable *st, PyObject *name, int flag, struct _s if (flag & DEF_PARAM) { if (PyList_Append(ste->ste_varnames, mangled) < 0) goto error; - } else if (flag & DEF_GLOBAL) { + } else if (flag & DEF_GLOBAL) { /* XXX need to update DEF_GLOBAL for other flags too; perhaps only DEF_FREE_GLOBAL */ - val = flag; + val = 0; if ((o = PyDict_GetItemWithError(st->st_global, mangled))) { - val |= PyLong_AS_LONG(o); + val = PyLong_AsLong(o); + if (val == -1 && PyErr_Occurred()) { + goto error; + } } else if (PyErr_Occurred()) { goto error; } + val |= flag; o = PyLong_FromLong(val); if (o == NULL) goto error; @@ -2176,6 +2230,9 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) */ if (ste->ste_comprehension) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); + if (target_in_scope < 0) { + VISIT_QUIT(st, 0); + } if ((target_in_scope & DEF_COMP_ITER) && (target_in_scope & DEF_LOCAL)) { PyErr_Format(PyExc_SyntaxError, NAMED_EXPR_COMP_CONFLICT, target_name); @@ -2188,6 +2245,9 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) /* If we find a FunctionBlock entry, add as GLOBAL/LOCAL or NONLOCAL/LOCAL */ if (ste->ste_type == FunctionBlock) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); + if (target_in_scope < 0) { + VISIT_QUIT(st, 0); + } if (target_in_scope & DEF_GLOBAL) { if (!symtable_add_def(st, target_name, DEF_GLOBAL, LOCATION(e))) VISIT_QUIT(st, 0); From 7905159270553830a3daa2547387a9a1a4ae9ae2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 2 Aug 2024 12:18:14 +0300 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Python/compile.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 9a782eb2372aa5..e0472246edf153 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -517,7 +517,6 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) num_keys = PyList_GET_SIZE(sorted_keys); for (key_i = 0; key_i < num_keys; key_i++) { - long vi; k = PyList_GET_ITEM(sorted_keys, key_i); v = PyDict_GetItemWithError(src, k); if (!v) { @@ -528,7 +527,7 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) Py_DECREF(dest); return NULL; } - vi = PyLong_AsLong(v); + long vi = PyLong_AsLong(v); if (vi == -1 && PyErr_Occurred()) { Py_DECREF(sorted_keys); Py_DECREF(dest); @@ -638,9 +637,7 @@ compiler_set_qualname(struct compiler *c) scope = _PyST_GetScope(parent->u_ste, mangled); Py_DECREF(mangled); - if (scope < 0) { - return ERROR; - } + RETURN_IF_ERROR(scope); assert(scope != GLOBAL_IMPLICIT); if (scope == GLOBAL_EXPLICIT) force_global = 1; @@ -4650,9 +4647,7 @@ is_import_originated(struct compiler *c, expr_ty e) } long flags = _PyST_GetSymbol(SYMTABLE(c)->st_top, e->v.Name.id); - if (flags < 0) { - return ERROR; - } + RETURN_IF_ERROR(flags); return flags & DEF_IMPORT; } @@ -4672,16 +4667,12 @@ can_optimize_super_call(struct compiler *c, expr_ty attr) PyObject *super_name = e->v.Call.func->v.Name.id; // detect statically-visible shadowing of 'super' name int scope = _PyST_GetScope(SYMTABLE_ENTRY(c), super_name); - if (scope < 0) { - return -1; - } + RETURN_IF_ERROR(scope); if (scope != GLOBAL_IMPLICIT) { return 0; } scope = _PyST_GetScope(SYMTABLE(c)->st_top, super_name); - if (scope < 0) { - return -1; - } + RETURN_IF_ERROR(scope); if (scope != 0) { return 0; } @@ -4789,9 +4780,7 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) /* Check that the base object is not something that is imported */ int ret = is_import_originated(c, meth->v.Attribute.value); - if (ret < 0) { - return ERROR; - } + RETURN_IF_ERROR(ret); if (ret) { return 0; } @@ -4821,9 +4810,7 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) location loc = LOC(meth); ret = can_optimize_super_call(c, meth); - if (ret < 0) { - return ERROR; - } + RETURN_IF_ERROR(ret); if (ret) { RETURN_IF_ERROR(load_args_for_super(c, meth->v.Attribute.value)); int opcode = asdl_seq_LEN(meth->v.Attribute.value->v.Call.args) ? @@ -6100,9 +6087,7 @@ compiler_visit_expr(struct compiler *c, expr_ty e) case Attribute_kind: if (e->v.Attribute.ctx == Load) { int ret = can_optimize_super_call(c, e); - if (ret < 0) { - return ERROR; - } + RETURN_IF_ERROR(ret); if (ret) { RETURN_IF_ERROR(load_args_for_super(c, e->v.Attribute.value)); int opcode = asdl_seq_LEN(e->v.Attribute.value->v.Call.args) ? From a8ed7ec7746868e71b863d3707c314ea2ea87e38 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 3 Aug 2024 13:48:50 +0300 Subject: [PATCH 3/3] Fix merge errors and apply review suggestions. --- Python/symtable.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/Python/symtable.c b/Python/symtable.c index 7ce6e6201bcbfa..4acf762f8fca39 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -526,11 +526,15 @@ _PySymtable_LookupOptional(struct symtable *st, void *key, long _PyST_GetSymbol(PySTEntryObject *ste, PyObject *name) { - PyObject *v = PyDict_GetItemWithError(ste->ste_symbols, name); + PyObject *v; + if (PyDict_GetItemRef(ste->ste_symbols, name, &v) < 0) { + return -1; + } if (!v) { - return PyErr_Occurred() ? -1 : 0; + return 0; } long symbol = PyLong_AsLong(v); + Py_DECREF(v); if (symbol < 0) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_SystemError, "invalid symbol"); @@ -965,14 +969,15 @@ update_symbols(PyObject *symbols, PyObject *scopes, if (contains) { flags |= DEF_COMP_CELL; } - v_scope = PyDict_GetItemWithError(scopes, name); + if (PyDict_GetItemRef(scopes, name, &v_scope) < 0) { + return 0; + } if (!v_scope) { - if (!PyErr_Occurred()) { - PyErr_SetObject(PyExc_KeyError, name); - } + PyErr_SetObject(PyExc_KeyError, name); return 0; } long scope = PyLong_AsLong(v_scope); + Py_DECREF(v_scope); if (scope == -1 && PyErr_Occurred()) { return 0; } @@ -2231,7 +2236,7 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) if (ste->ste_comprehension) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); if (target_in_scope < 0) { - VISIT_QUIT(st, 0); + return 0; } if ((target_in_scope & DEF_COMP_ITER) && (target_in_scope & DEF_LOCAL)) { @@ -2246,7 +2251,7 @@ symtable_extend_namedexpr_scope(struct symtable *st, expr_ty e) if (ste->ste_type == FunctionBlock) { long target_in_scope = symtable_lookup_entry(st, ste, target_name); if (target_in_scope < 0) { - VISIT_QUIT(st, 0); + return 0; } if (target_in_scope & DEF_GLOBAL) { if (!symtable_add_def(st, target_name, DEF_GLOBAL, LOCATION(e))) @@ -2661,9 +2666,6 @@ symtable_visit_params(struct symtable *st, asdl_arg_seq *args) { Py_ssize_t i; - if (!args) - return -1; - for (i = 0; i < asdl_seq_LEN(args); i++) { arg_ty arg = (arg_ty)asdl_seq_GET(args, i); if (!symtable_add_def(st, arg->arg, DEF_PARAM, LOCATION(arg))) @@ -2710,9 +2712,6 @@ symtable_visit_argannotations(struct symtable *st, asdl_arg_seq *args) { Py_ssize_t i; - if (!args) - return -1; - for (i = 0; i < asdl_seq_LEN(args); i++) { arg_ty arg = (arg_ty)asdl_seq_GET(args, i); if (arg->annotation) {