Skip to content

Commit

Permalink
[Python] set the right scope for comprehension variables
Browse files Browse the repository at this point in the history
This closes #4260

test plan:
test files included and also see how the same variables "v" now
resolve to the same Local each time
```
$ semgrep-core -lang py -dump_named_ast python/metavar_iterator.py
Pr(
  [DefStmt(
     ({
       name=EN(
              Id(("test", ()),
                {id_hidden=false; id_resolved=Ref(None); id_type=Ref(
                 None); id_constness=Ref(None); }));
       attrs=[]; tparams=[]; },
      FuncDef(
        {fkind=(Function, ()); fparams=[]; frettype=None;
         fbody=FBStmt(
                 Block(
                   [ExprStmt(
                      Comprehension(List,
                        (N(
                           Id(("v", ()),
                             {id_hidden=false;
                              id_resolved=Ref(Some((Local, 1)));
                              id_type=Ref(None); id_constness=Ref(None); })),
                         [CompFor((),
                            PatId(("v", ()),
                              {id_hidden=false;
                               id_resolved=Ref(Some((Local, 1)));
                               id_type=Ref(None); id_constness=Ref(None); }),
                            (),
                            N(
                              Id(("iterator", ()),
                                {id_hidden=false; id_resolved=Ref(None);
                                 id_type=Ref(None); id_constness=Ref(
                                 None); })))])), ());
                    ExprStmt(
                      Comprehension(List,
                        (Call(
                           N(
                             Id(("f", ()),
                               {id_hidden=false; id_resolved=Ref(None);
                                id_type=Ref(None); id_constness=Ref(None); })),
                           [Arg(
                              N(
                                Id(("v", ()),
                                  {id_hidden=false;
                                   id_resolved=Ref(Some((Local, 2)));
                                   id_type=Ref(None); id_constness=Ref(
                                   None); })))]),
                         [CompFor((),
                            PatId(("v", ()),
                              {id_hidden=false;
                               id_resolved=Ref(Some((Local, 2)));
                               id_type=Ref(None); id_constness=Ref(None); }),
                            (),
                            N(
                              Id(("iterator", ()),
                                {id_hidden=false; id_resolved=Ref(None);
                                 id_type=Ref(None); id_constness=Ref(
                                 None); })))])), ())]));
         })))])
```
  • Loading branch information
aryx committed Dec 8, 2021
1 parent 199c47b commit 60511a7
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 14 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,12 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html

## Unreleased

### Added
- New language Solidity with experimental support.

## Fixed
- Python: set the right scope for comprehension variables (#4260)

## [0.76.1](https://github.com/returntocorp/semgrep/releases/tag/v0.76.1) - 12-07-2021

## Fixed
Expand Down
28 changes: 20 additions & 8 deletions semgrep-core/src/analyzing/Naming_AST.ml
Expand Up @@ -19,6 +19,9 @@ module H = AST_generic_helpers

let logger = Logging.get_logger [ __MODULE__ ]

(* see error() below *)
let error_report = false

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
Expand Down Expand Up @@ -86,8 +89,6 @@ let logger = Logging.get_logger [ __MODULE__ ]
* See set_resolved()
*
* TODO:
* - resolve Comprehension, which can introduce bindings locally
* (kpattern might do its job, but the scope might be too big)
* - generalize the original "resolvers":
* * resolve_go.ml
* * resolve_python.ml
Expand Down Expand Up @@ -172,7 +173,8 @@ let default_scopes () = { global = ref []; blocks = ref []; imported = ref [] }
let with_new_function_scope params scopes f =
Common.save_excursion scopes.blocks (params :: !(scopes.blocks)) f

let _with_new_block_scope _params _lang _scopes _f = raise Todo
let with_new_block_scope scopes f =
Common.save_excursion scopes.blocks ([] :: !(scopes.blocks)) f

let add_ident_current_scope (s, _) resolved scopes =
match !(scopes.blocks) with
Expand All @@ -191,6 +193,7 @@ let _add_ident_function_scope _id _resolved _scopes = raise Todo

let untyped_ent name = { entname = name; enttype = None }

(* see also lookup_scope_opt below taking as a parameter the environment *)
let rec lookup s xxs =
match xxs with
| [] -> None
Expand Down Expand Up @@ -295,7 +298,6 @@ let lookup_scope_opt (s, _) env =
(*****************************************************************************)
(* Error management *)
(*****************************************************************************)
let error_report = false

let error tok s =
if error_report then raise (Parse_info.Other_error (s, tok))
Expand Down Expand Up @@ -347,11 +349,10 @@ let params_of_parameters env xs =
| _ -> None)

let declare_var env lang id id_info ~explicit vinit vtype =
(* name resolution *)
let sid = H.gensym () in
(* for the type, we use the (optional) type in vtype, or, if we can infer *)
(* the type of the expression vinit (literal or id), we use that as a type *)
(* useful when the type is not given, e.g. in Go: `var x = 2` *)
(* for the type, we use the (optional) type in vtype, or, if we can infer
* the type of the expression vinit (literal or id), we use that as a type
* useful when the type is not given, e.g. in Go: `var x = 2` *)
let resolved_type = Typing.get_resolved_type lang (vinit, vtype) in
let name_kind, add_ident_to_its_scope =
(* In JS/TS an assignment to a variable that has not been
Expand Down Expand Up @@ -559,6 +560,7 @@ let resolve lang prog =
(fun (k, _vout) x ->
let _t, exn, _st = x in
(match exn with
(* TODO: we should create a new block scope *)
| CatchParam { pname = Some id; pinfo = id_info; _ }
when is_resolvable_name_ctx env lang ->
declare_var env lang id id_info ~explicit:true None None
Expand Down Expand Up @@ -628,6 +630,8 @@ let resolve lang prog =
| IdQualified _ -> ());
V.kexpr =
(fun (k, vout) x ->
(* ugly: hack. If we use a classic recursive-with-env visitor,
* we would not need this *)
let recurse = ref true in
(match x.e with
(* Go: This is `x := E`, a single-variable short variable declaration.
Expand Down Expand Up @@ -701,6 +705,14 @@ let resolve lang prog =
| _ ->
let s, tok = id in
error tok (spf "could not find '%s' field in environment" s))
| Comprehension (_op, (_l, (e, xs), _r)) ->
(* Actually in Python2, no new scope was created, so iterator vars
* could leak in the outer scope. This was fixed in Python3. *)
with_new_block_scope env.names (fun () ->
(* first visit xs, then e *)
xs |> List.iter (fun x -> vout (ForOrIfComp x));
vout (E e));
recurse := false
| _ -> ());
if !recurse then k x);
V.ktype_ =
Expand Down
1 change: 1 addition & 0 deletions semgrep-core/src/api/AST_generic_to_v1.ml
Expand Up @@ -1163,6 +1163,7 @@ and map_program v = map_of_list map_item v

and map_any x : B.any =
match x with
| ForOrIfComp _
| Tp _
| Ta _ ->
failwith "TODO"
Expand Down
6 changes: 4 additions & 2 deletions semgrep-core/src/core/ast/AST_generic.ml
Expand Up @@ -546,9 +546,10 @@ and container_operator =
*)
| Tuple

(* For Python/HCL (and Haskell later). The 'expr' is a 'Tuple' to
(* For Python/HCL (and Haskell later). The 'expr' can be a 'Tuple' to
* represent a Key/Value pair (like in Container). See keyval() below.
* newscope:
* newscope: for_or_if_comp introduce new local variables whose scope
* is just the first expr.
*)
and comprehension = expr * for_or_if_comp list

Expand Down Expand Up @@ -1709,6 +1710,7 @@ and any =
| Modn of module_name
| Ce of catch_exn
| Cs of case
| ForOrIfComp of for_or_if_comp
(* todo: get rid of some? *)
| ModDk of module_definition_kind
| En of entity
Expand Down
3 changes: 3 additions & 0 deletions semgrep-core/src/core/ast/Map_AST.ml
Expand Up @@ -1128,6 +1128,9 @@ let (mk_visitor : visitor_in -> visitor_out) =
let v3 = map_expr v3 in
PartialSingleField (v1, v2, v3)
and map_any = function
| ForOrIfComp v1 ->
let v1 = map_for_or_if_comp v1 in
ForOrIfComp v1
| Tp v1 ->
let v1 = map_type_parameter v1 in
Tp v1
Expand Down
3 changes: 3 additions & 0 deletions semgrep-core/src/core/ast/Meta_AST.ml
Expand Up @@ -1346,6 +1346,9 @@ and vof_partial = function
OCaml.VSum ("PartialSingleField", [ v1; v2; v3 ])

and vof_any = function
| ForOrIfComp v1 ->
let v1 = vof_for_or_if_comp v1 in
OCaml.VSum ("ForOrIfComp", [ v1 ])
| Tp v1 ->
let v1 = vof_type_parameter v1 in
OCaml.VSum ("Tp", [ v1 ])
Expand Down
1 change: 1 addition & 0 deletions semgrep-core/src/core/ast/Visitor_AST.ml
Expand Up @@ -1258,6 +1258,7 @@ let (mk_visitor :
v_id_info v2
and v_program v = v_stmts v
and v_any = function
| ForOrIfComp v1 -> v_for_or_if_comp v1
| Tp v1 -> v_type_parameter v1
| Ta v1 -> v_type_argument v1
| Cs v1 -> v_case v1
Expand Down
2 changes: 2 additions & 0 deletions semgrep-core/src/matching/Generic_vs_generic.ml
Expand Up @@ -2835,6 +2835,7 @@ and m_any a b =
| G.Pr a1, B.Pr b1 -> m_program a1 b1
| G.I a1, B.I b1 -> m_ident a1 b1
| G.Lbli a1, B.Lbli b1 -> m_label_ident a1 b1
| G.ForOrIfComp a1, B.ForOrIfComp b1 -> m_for_or_if_comp a1 b1
| G.I _, _
| G.Modn _, _
| G.Di _, _
Expand Down Expand Up @@ -2863,6 +2864,7 @@ and m_any a b =
| G.TodoK _, _
| G.Partial _, _
| G.Args _, _
| G.ForOrIfComp _, _
| G.Anys _, _
| G.Str _, _ ->
fail ()
13 changes: 9 additions & 4 deletions semgrep-core/src/matching/Matching_generic.ml
Expand Up @@ -266,19 +266,24 @@ let rec equal_ast_binded_code (config : Config_semgrep.t) (a : MV.mvalue)
* - id_constness (see the special @equal for id_constness)
*)
MV.Structural.equal_mvalue a b
(* TODO still needed now that we have the better MV.Id of id_info? *)
| MV.Id _, MV.E { e = G.N (G.Id (b_id, b_id_info)); _ } ->
(* TODO still needed now that we have the better MV.Id of id_info? *)
(* TOFIX: regression if remove this code *)
(* Allow identifier nodes to match pure identifier expressions *)

(* You should prefer to add metavar as expression (G.E), not id (G.I),
* (see Generic_vs_generic.m_ident_and_id_info_add_in_env_Expr)
* but in some cases you have no choice and you need to match an expression
* but in some cases you have no choice and you need to match an expr
* metavar with an id metavar.
* For example, we want the pattern 'const $X = foo.$X' to match 'const bar = foo.bar'
* (this is useful in the Javascript transpilation context of complex pattern parameter).
* For example, we want the pattern 'const $X = foo.$X' to match
* 'const bar = foo.bar'
* (this is useful in the Javascript transpilation context of
* complex pattern parameter).
*)
equal_ast_binded_code config a (MV.Id (b_id, Some b_id_info))
(* TODO: we should get rid of that too, we should properly bind to MV.N *)
| MV.E { e = G.N (G.Id (a_id, a_id_info)); _ }, MV.Id _ ->
equal_ast_binded_code config (MV.Id (a_id, Some a_id_info)) b
| _, _ -> false
in

Expand Down
8 changes: 8 additions & 0 deletions semgrep-core/tests/python/metavar_iterator.py
@@ -0,0 +1,8 @@
def test():

#ERROR: match
[v for v in iterator]

#ok:redundant-iterator
[f(v) for v in iterator]

1 change: 1 addition & 0 deletions semgrep-core/tests/python/metavar_iterator.sgrep
@@ -0,0 +1 @@
[$X for $X in $ITERATOR]

0 comments on commit 60511a7

Please sign in to comment.