Skip to content

Commit

Permalink
Scala: allow ellipsis in for loop headers (#5661)
Browse files Browse the repository at this point in the history
* add multi-for-each headers, use them in scala translation

* add test

* changelog

* fix changelog

Co-authored-by: Yoann Padioleau <pad@r2c.dev>
  • Loading branch information
mmcqd and aryx committed Jun 30, 2022
1 parent 175e6d7 commit 6368cff
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 84 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html

## Unreleased

### Added

- Scala: ellipsis are now allowed in for loop headers (#5650)

### Fixed

- taint-mode: In some scenarios some statements were not being included in the
Expand Down
93 changes: 51 additions & 42 deletions semgrep-core/src/analyzing/AST_to_IL.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1067,48 +1067,15 @@ and stmt_aux env st =
@ [ mk_s (Loop (tok, e', st @ cont_label_s @ ss)) ]
@ break_label_s
| G.For (tok, G.ForEach (pat, tok2, e), st) ->
let cont_label_s, break_label_s, st_env =
mk_break_continue_labels env tok
in
let ss, e' = expr_with_pre_stmts env e in
let st = stmt st_env st in

let next_lval = fresh_lval env tok2 in
let hasnext_lval = fresh_lval env tok2 in
let hasnext_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some hasnext_lval, (ForeachHasNext, tok2), [ e' ]))
(related_tok tok2)))
in
let next_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some next_lval, (ForeachNext, tok2), [ e' ]))
(related_tok tok2)))
in
(* same semantic? or need to take Ref? or pass lval
* directly in next_call instead of using intermediate next_lval?
*)
let assign_st =
pattern_assign_statements env
(mk_e (Fetch next_lval) (related_tok tok2))
~eorig:(related_tok tok2) pat
in
let cond = mk_e (Fetch hasnext_lval) (related_tok tok2) in

(ss @ [ hasnext_call ])
@ [
mk_s
(Loop
( tok,
cond,
[ next_call ] @ assign_st @ st @ cont_label_s
@ [ (* ss @ ?*) hasnext_call ] ));
]
@ break_label_s
for_each env tok (pat, tok2, e) st
| G.For (_, G.MultiForEach [], st) -> stmt env st
| G.For (_, G.MultiForEach (FEllipsis _ :: _), _) -> sgrep_construct (G.S st)
| G.For (tok, G.MultiForEach (FECond (fr, tok2, e) :: for_eachs), st) ->
let loop = G.For (tok, G.MultiForEach for_eachs, st) |> G.s in
let st = G.If (tok2, Cond e, loop, None) |> G.s in
for_each env tok fr st
| G.For (tok, G.MultiForEach (FE fr :: for_eachs), st) ->
for_each env tok fr (G.For (tok, G.MultiForEach for_eachs, st) |> G.s)
| G.For (tok, G.ForClassic (xs, eopt1, eopt2), st) ->
let cont_label_s, break_label_s, st_env =
mk_break_continue_labels env tok
Expand Down Expand Up @@ -1252,6 +1219,48 @@ and stmt_aux env st =
| G.OtherStmtWithStmt _ ->
todo (G.S st)

and for_each env tok (pat, tok2, e) st =
let cont_label_s, break_label_s, st_env = mk_break_continue_labels env tok in
let ss, e' = expr_with_pre_stmts env e in
let st = stmt st_env st in

let next_lval = fresh_lval env tok2 in
let hasnext_lval = fresh_lval env tok2 in
let hasnext_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some hasnext_lval, (ForeachHasNext, tok2), [ e' ]))
(related_tok tok2)))
in
let next_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some next_lval, (ForeachNext, tok2), [ e' ]))
(related_tok tok2)))
in
(* same semantic? or need to take Ref? or pass lval
* directly in next_call instead of using intermediate next_lval?
*)
let assign_st =
pattern_assign_statements env
(mk_e (Fetch next_lval) (related_tok tok2))
~eorig:(related_tok tok2) pat
in
let cond = mk_e (Fetch hasnext_lval) (related_tok tok2) in

(ss @ [ hasnext_call ])
@ [
mk_s
(Loop
( tok,
cond,
[ next_call ] @ assign_st @ st @ cont_label_s
@ [ (* ss @ ?*) hasnext_call ] ));
]
@ break_label_s

(* TODO: Maybe this and the following function could be merged *)
and switch_expr_and_cases_to_exp env tok switch_expr_orig switch_expr cases =
(* If there is a scrutinee, the cases are expressions we need to check for equality with the scrutinee *)
Expand Down
17 changes: 13 additions & 4 deletions semgrep-core/src/core/ast/AST_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1052,16 +1052,25 @@ and for_header =
* expr option (* cond *)
* expr option (* next *)
(* newvar: *)
| ForEach of
pattern
* tok (* 'in' Python, 'range' Go, 'as' PHP, '' Java *)
* expr (* pattern 'in' expr *)
| ForEach of for_each
(* Scala *)
| MultiForEach of multi_for_each list
(* Lua. todo: merge with ForEach? *)
(* pattern 'in' expr *)
| ForIn of for_var_or_expr list (* init *) * expr list
(* sgrep: *)
| ForEllipsis of (* ... *) tok

and for_each =
pattern
* tok (* 'in' Python, 'range' Go, 'as' PHP, '' Java, '<-' Scala *)
* expr (* pattern 'in' expr *)

and multi_for_each =
| FE of for_each
| FECond of for_each * tok * expr
| FEllipsis of tok

and for_var_or_expr =
(* newvar: *)
| ForInitVar of entity * variable_definition
Expand Down
14 changes: 10 additions & 4 deletions semgrep-core/src/core/ast/Map_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -753,17 +753,23 @@ let (mk_visitor : visitor_in -> visitor_out) =
and v2 = map_of_option map_expr v2
and v3 = map_of_option map_expr v3 in
ForClassic (v1, v2, v3)
| ForEach (v1, t, v2) ->
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
ForEach (v1, t, v2)
| ForEach v1 -> ForEach (map_for_each v1)
| MultiForEach v1 -> MultiForEach (map_of_list map_multi_for_each v1)
| ForEllipsis t ->
let t = map_tok t in
ForEllipsis t
| ForIn (v1, v2) ->
let v1 = map_of_list map_for_var_or_expr v1
and v2 = map_of_list map_expr v2 in
ForIn (v1, v2)
and map_for_each (v1, t, v2) =
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
(v1, t, v2)
and map_multi_for_each = function
| FE v1 -> FE (map_for_each v1)
| FECond (v1, t, v2) -> FECond (map_for_each v1, map_tok t, map_expr v2)
| FEllipsis t -> FEllipsis (map_tok t)
and map_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = map_entity v1 and v2 = map_variable_definition v2 in
Expand Down
27 changes: 23 additions & 4 deletions semgrep-core/src/core/ast/Meta_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -854,10 +854,12 @@ and vof_for_header = function
and v2 = OCaml.vof_option vof_expr v2
and v3 = OCaml.vof_option vof_expr v3 in
OCaml.VSum ("ForClassic", [ v1; v2; v3 ])
| ForEach (v1, t, v2) ->
let t = vof_tok t in
let v1 = vof_pattern v1 and v2 = vof_expr v2 in
OCaml.VSum ("ForEach", [ v1; t; v2 ])
| ForEach v1 ->
let v1 = vof_for_each v1 in
OCaml.VSum ("ForEach", [ v1 ])
| MultiForEach v1 ->
let v1 = OCaml.vof_list vof_multi_for_each v1 in
OCaml.VSum ("MultiForEach", [ v1 ])
| ForEllipsis t ->
let t = vof_tok t in
OCaml.VSum ("ForEllipsis", [ t ])
Expand All @@ -866,6 +868,23 @@ and vof_for_header = function
and v2 = OCaml.vof_list vof_expr v2 in
OCaml.VSum ("ForIn", [ v1; v2 ])

and vof_for_each (v1, t, v2) =
let t = vof_tok t in
let v1 = vof_pattern v1 and v2 = vof_expr v2 in
OCaml.VTuple [ v1; t; v2 ]

and vof_multi_for_each = function
| FE v1 ->
let v1 = vof_for_each v1 in
OCaml.VSum ("FE", [ v1 ])
| FECond (v1, t, v2) ->
let t = vof_tok t in
let v1 = vof_for_each v1 and v2 = vof_expr v2 in
OCaml.VSum ("FECond", [ v1; t; v2 ])
| FEllipsis t ->
let t = vof_tok t in
OCaml.VSum ("FEllipsis", [ t ])

and vof_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = vof_entity v1 and v2 = vof_variable_definition v2 in
Expand Down
12 changes: 10 additions & 2 deletions semgrep-core/src/core/ast/Pretty_print_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,20 @@ and for_stmt env level (for_tok, hdr, s) =
in
let opt_expr = opt (fun x -> expr env x) in
let hdr_str =
let for_each (pat, tok, e) =
F.sprintf "%s %s %s" (pattern env pat) (token "in" tok) (expr env e)
in
let multi_for_each = function
| FE fe -> for_each fe
| FECond (fe, _, e) -> F.sprintf "%s if %s" (for_each fe) (expr env e)
| FEllipsis _ -> "..."
in
match hdr with
| ForClassic (init, cond, next) ->
F.sprintf "%s; %s; %s" (show_init_list init) (opt_expr cond)
(opt_expr next)
| ForEach (pat, tok, e) ->
F.sprintf "%s %s %s" (pattern env pat) (token "in" tok) (expr env e)
| ForEach (pat, tok, e) -> for_each (pat, tok, e)
| MultiForEach fors -> String.concat ";" (Common.map multi_for_each fors)
| ForEllipsis tok -> token "..." tok
| ForIn (init, exprs) ->
F.sprintf "%s %s %s" (show_init_list init) "in"
Expand Down
17 changes: 13 additions & 4 deletions semgrep-core/src/core/ast/Visitor_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -846,14 +846,23 @@ let (mk_visitor :
and v2 = v_option v_expr v2
and v3 = v_option v_expr v3 in
()
| ForEach (v1, t, v2) ->
let t = v_tok t in
let v1 = v_pattern v1 and v2 = v_expr v2 in
()
| ForEach v1 -> v_for_each v1
| MultiForEach v1 -> v_list v_multi_for_each v1
| ForEllipsis t -> v_tok t
| ForIn (v1, v2) ->
let v1 = v_list v_for_var_or_expr v1 and v2 = v_list v_expr v2 in
()
and v_for_each (v1, t, v2) =
let t = v_tok t in
let v1 = v_pattern v1 and v2 = v_expr v2 in
()
and v_multi_for_each = function
| FE v1 -> v_for_each v1
| FECond (v1, t, v2) ->
v_for_each v1;
v_tok t;
v_expr v2
| FEllipsis t -> v_tok t
and v_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = v_entity v1 and v2 = v_variable_definition v2 in
Expand Down
5 changes: 5 additions & 0 deletions semgrep-core/src/experiments/api/AST_generic_to_v0.ml
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,11 @@ and map_for_header = function
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
`ForEach (v1, t, v2)
| MultiForEach (FE (v1, t, v2) :: _) ->
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
`ForEach (v1, t, v2)
| MultiForEach _ -> failwith "todo - MultiForEach"
| ForEllipsis t ->
let t = map_tok t in
`ForEllipsis t
Expand Down
24 changes: 21 additions & 3 deletions semgrep-core/src/matching/Generic_vs_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2250,9 +2250,13 @@ and m_for_header a b =
| G.ForClassic (a1, a2, a3), B.ForClassic (b1, b2, b3) ->
(m_list m_for_var_or_expr) a1 b1 >>= fun () ->
m_option m_expr a2 b2 >>= fun () -> m_option m_expr a3 b3
| G.ForEach (a1, at, a2), B.ForEach (b1, bt, b2) ->
m_pattern a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2
| G.ForEach a1, B.ForEach b1 -> m_for_each a1 b1
| G.MultiForEach a1, B.MultiForEach b1 ->
m_list_with_dots ~less_is_ok:false m_multi_for_each
(function
| G.FEllipsis _ -> true
| _ -> false)
a1 b1
| G.ForIn (a1, a2), B.ForIn (b1, b2) ->
(m_list m_for_var_or_expr) a1 b1 >>= fun () ->
m_list_with_dots m_expr
Expand All @@ -2262,9 +2266,23 @@ and m_for_header a b =
~less_is_ok:false a2 b2
| G.ForClassic _, _
| G.ForEach _, _
| G.MultiForEach _, _
| G.ForIn _, _ ->
fail ()

and m_for_each (a1, at, a2) (b1, bt, b2) =
m_pattern a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2

and m_multi_for_each a b =
match (a, b) with
| G.FE a1, B.FE b1 -> m_for_each a1 b1
| G.FECond (a1, at, a2), B.FECond (b1, bt, b2) ->
m_for_each a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2
| G.FEllipsis _, _ -> return ()
| _ -> fail ()

and m_block a b =
match (a.s, b.s) with
| G.Block _, B.Block _ -> m_stmt a b
Expand Down
7 changes: 7 additions & 0 deletions semgrep-core/src/matching/SubAST_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ let subexprs_of_stmt_kind = function
| Some cond -> [ H.cond_to_expr cond ])
| Return (_, eopt, _) -> Option.to_list eopt
(* n *)
| For (_, MultiForEach es, _) ->
es
|> List.filter_map (function
| FE (_, _, e) -> Some [ e ]
| FECond ((_, _, e1), _, e2) -> Some [ e1; e2 ]
| FEllipsis _ -> None)
|> List.concat
| For (_, ForClassic (xs, eopt1, eopt2), _) ->
(xs
|> Common.map_filter (function
Expand Down
33 changes: 13 additions & 20 deletions semgrep-core/src/parsing/pfff/scala_to_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -562,24 +562,7 @@ and v_stmt = function
let v1 = v_tok v1
and v2 = v2 |> G.unbracket |> v_enumerators
and v3 = v_for_body v3 in
List.fold_right
(fun gen stmt ->
match gen with
| `G (pat, tok, e, guards) ->
G.For
( v1,
G.ForEach (pat, tok, e),
List.fold_right
(fun (g_tok, g_e) stmt ->
G.If (g_tok, G.Cond g_e, stmt, None) |> G.s)
guards stmt )
|> G.s
| `GIf guards ->
List.fold_right
(fun (g_tok, g_e) stmt ->
G.If (g_tok, G.Cond g_e, stmt, None) |> G.s)
guards stmt)
v2 v3
G.For (v1, G.MultiForEach v2, v3) |> G.s
| Return (v1, v2) ->
let v1 = v_tok v1 and v2 = v_option v_expr v2 in
G.Return (v1, v2, G.sc) |> G.s
Expand All @@ -601,8 +584,18 @@ and v_stmt = function
and v_enumerators v = v_list v_enumerator v

and v_enumerator = function
| G v1 -> `G (v_generator v1)
| GIf v1 -> `GIf (v_list v_guard v1)
| G v1 -> (
let pat, tok, e, guards = v_generator v1 in
match guards with
| [] -> G.FE (pat, tok, e)
| (tok2, cond) :: guards ->
let conds =
List.fold_left
(fun e (tok, c) -> G.special (G.Op G.And, tok) [ e; c ])
cond guards
in
G.FECond ((pat, tok, e), tok2, conds))
| GEllipsis tok -> G.FEllipsis tok

and v_generator
{
Expand Down
2 changes: 1 addition & 1 deletion semgrep-core/src/pfff
Submodule pfff updated from f80b02 to 71bbc1
8 changes: 8 additions & 0 deletions semgrep-core/tests/scala/for_loop_ellipsis.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//ERROR: match
val x = for (w <- u if foo) yield w

//ERROR: match
val y = for (a <- b; w <- u if foo; c <- d) yield w

//OK:
val z = for (a <- b; w <- u if bar) yield w
1 change: 1 addition & 0 deletions semgrep-core/tests/scala/for_loop_ellipsis.sgrep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
for (...; $X <- $Y if foo; ...) yield ...

0 comments on commit 6368cff

Please sign in to comment.