Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scala: allow ellipsis in for loop headers #5661

Merged
merged 6 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html

## Unreleased

### Added

- Scala: ellipsis are now allowed in for loop headers (#5650)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During the "changelog meeting" this week, Bruno said he is now the one who is looking at the changelog to help producing the release notes, and that he thinks we don't generally write good changelog entries (that allow him to do that job easily). I think he will have problems understanding the purpose of this one.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can ask him directly, but my understanding is that he would like to know that this change is to allow Scala users to use ellipsis to write patterns matching a sequence of nested for loops. He actually emphasized a lot the what, why, and how but I'm not sure how that would be instantiated in this particular case.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think an example would be nice "(e.g., you can write a pattern like for(...; $X <- bar(); ...) { ... }".
Otherwise I feel the associated issue (#5650) mentioned in the changelog can help answer the what/why/how.


### Fixed

- taint-mode: In some scenarios some statements were not being included in the
Expand Down
93 changes: 51 additions & 42 deletions semgrep-core/src/analyzing/AST_to_IL.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1067,48 +1067,15 @@ and stmt_aux env st =
@ [ mk_s (Loop (tok, e', st @ cont_label_s @ ss)) ]
@ break_label_s
| G.For (tok, G.ForEach (pat, tok2, e), st) ->
let cont_label_s, break_label_s, st_env =
mk_break_continue_labels env tok
in
let ss, e' = expr_with_pre_stmts env e in
let st = stmt st_env st in

let next_lval = fresh_lval env tok2 in
let hasnext_lval = fresh_lval env tok2 in
let hasnext_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some hasnext_lval, (ForeachHasNext, tok2), [ e' ]))
(related_tok tok2)))
in
let next_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some next_lval, (ForeachNext, tok2), [ e' ]))
(related_tok tok2)))
in
(* same semantic? or need to take Ref? or pass lval
* directly in next_call instead of using intermediate next_lval?
*)
let assign_st =
pattern_assign_statements env
(mk_e (Fetch next_lval) (related_tok tok2))
~eorig:(related_tok tok2) pat
in
let cond = mk_e (Fetch hasnext_lval) (related_tok tok2) in

(ss @ [ hasnext_call ])
@ [
mk_s
(Loop
( tok,
cond,
[ next_call ] @ assign_st @ st @ cont_label_s
@ [ (* ss @ ?*) hasnext_call ] ));
]
@ break_label_s
for_each env tok (pat, tok2, e) st
| G.For (_, G.MultiForEach [], st) -> stmt env st
| G.For (_, G.MultiForEach (FEllipsis _ :: _), _) -> sgrep_construct (G.S st)
| G.For (tok, G.MultiForEach (FECond (fr, tok2, e) :: for_eachs), st) ->
let loop = G.For (tok, G.MultiForEach for_eachs, st) |> G.s in
let st = G.If (tok2, Cond e, loop, None) |> G.s in
for_each env tok fr st
| G.For (tok, G.MultiForEach (FE fr :: for_eachs), st) ->
for_each env tok fr (G.For (tok, G.MultiForEach for_eachs, st) |> G.s)
| G.For (tok, G.ForClassic (xs, eopt1, eopt2), st) ->
let cont_label_s, break_label_s, st_env =
mk_break_continue_labels env tok
Expand Down Expand Up @@ -1252,6 +1219,48 @@ and stmt_aux env st =
| G.OtherStmtWithStmt _ ->
todo (G.S st)

and for_each env tok (pat, tok2, e) st =
let cont_label_s, break_label_s, st_env = mk_break_continue_labels env tok in
let ss, e' = expr_with_pre_stmts env e in
let st = stmt st_env st in

let next_lval = fresh_lval env tok2 in
let hasnext_lval = fresh_lval env tok2 in
let hasnext_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some hasnext_lval, (ForeachHasNext, tok2), [ e' ]))
(related_tok tok2)))
in
let next_call =
mk_s
(Instr
(mk_i
(CallSpecial (Some next_lval, (ForeachNext, tok2), [ e' ]))
(related_tok tok2)))
in
(* same semantic? or need to take Ref? or pass lval
* directly in next_call instead of using intermediate next_lval?
*)
let assign_st =
pattern_assign_statements env
(mk_e (Fetch next_lval) (related_tok tok2))
~eorig:(related_tok tok2) pat
in
let cond = mk_e (Fetch hasnext_lval) (related_tok tok2) in

(ss @ [ hasnext_call ])
@ [
mk_s
(Loop
( tok,
cond,
[ next_call ] @ assign_st @ st @ cont_label_s
@ [ (* ss @ ?*) hasnext_call ] ));
]
@ break_label_s

(* TODO: Maybe this and the following function could be merged *)
and switch_expr_and_cases_to_exp env tok switch_expr_orig switch_expr cases =
(* If there is a scrutinee, the cases are expressions we need to check for equality with the scrutinee *)
Expand Down
17 changes: 13 additions & 4 deletions semgrep-core/src/core/ast/AST_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1052,16 +1052,25 @@ and for_header =
* expr option (* cond *)
* expr option (* next *)
(* newvar: *)
| ForEach of
pattern
* tok (* 'in' Python, 'range' Go, 'as' PHP, '' Java *)
* expr (* pattern 'in' expr *)
| ForEach of for_each
(* Scala *)
| MultiForEach of multi_for_each list
(* Lua. todo: merge with ForEach? *)
(* pattern 'in' expr *)
| ForIn of for_var_or_expr list (* init *) * expr list
(* sgrep: *)
| ForEllipsis of (* ... *) tok

and for_each =
pattern
* tok (* 'in' Python, 'range' Go, 'as' PHP, '' Java, '<-' Scala *)
* expr (* pattern 'in' expr *)

and multi_for_each =
| FE of for_each
| FECond of for_each * tok * expr
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe in comment you can put an example, like x <- xs if x > 2

| FEllipsis of tok
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually add a semgrep-ext: comment before those constructs that are Semgrep specific.


and for_var_or_expr =
(* newvar: *)
| ForInitVar of entity * variable_definition
Expand Down
14 changes: 10 additions & 4 deletions semgrep-core/src/core/ast/Map_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -753,17 +753,23 @@ let (mk_visitor : visitor_in -> visitor_out) =
and v2 = map_of_option map_expr v2
and v3 = map_of_option map_expr v3 in
ForClassic (v1, v2, v3)
| ForEach (v1, t, v2) ->
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
ForEach (v1, t, v2)
| ForEach v1 -> ForEach (map_for_each v1)
| MultiForEach v1 -> MultiForEach (map_of_list map_multi_for_each v1)
| ForEllipsis t ->
let t = map_tok t in
ForEllipsis t
| ForIn (v1, v2) ->
let v1 = map_of_list map_for_var_or_expr v1
and v2 = map_of_list map_expr v2 in
ForIn (v1, v2)
and map_for_each (v1, t, v2) =
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
(v1, t, v2)
and map_multi_for_each = function
| FE v1 -> FE (map_for_each v1)
| FECond (v1, t, v2) -> FECond (map_for_each v1, map_tok t, map_expr v2)
| FEllipsis t -> FEllipsis (map_tok t)
and map_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = map_entity v1 and v2 = map_variable_definition v2 in
Expand Down
27 changes: 23 additions & 4 deletions semgrep-core/src/core/ast/Meta_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -854,10 +854,12 @@ and vof_for_header = function
and v2 = OCaml.vof_option vof_expr v2
and v3 = OCaml.vof_option vof_expr v3 in
OCaml.VSum ("ForClassic", [ v1; v2; v3 ])
| ForEach (v1, t, v2) ->
let t = vof_tok t in
let v1 = vof_pattern v1 and v2 = vof_expr v2 in
OCaml.VSum ("ForEach", [ v1; t; v2 ])
| ForEach v1 ->
let v1 = vof_for_each v1 in
OCaml.VSum ("ForEach", [ v1 ])
| MultiForEach v1 ->
let v1 = OCaml.vof_list vof_multi_for_each v1 in
OCaml.VSum ("MultiForEach", [ v1 ])
| ForEllipsis t ->
let t = vof_tok t in
OCaml.VSum ("ForEllipsis", [ t ])
Expand All @@ -866,6 +868,23 @@ and vof_for_header = function
and v2 = OCaml.vof_list vof_expr v2 in
OCaml.VSum ("ForIn", [ v1; v2 ])

and vof_for_each (v1, t, v2) =
let t = vof_tok t in
let v1 = vof_pattern v1 and v2 = vof_expr v2 in
OCaml.VTuple [ v1; t; v2 ]

and vof_multi_for_each = function
| FE v1 ->
let v1 = vof_for_each v1 in
OCaml.VSum ("FE", [ v1 ])
| FECond (v1, t, v2) ->
let t = vof_tok t in
let v1 = vof_for_each v1 and v2 = vof_expr v2 in
OCaml.VSum ("FECond", [ v1; t; v2 ])
| FEllipsis t ->
let t = vof_tok t in
OCaml.VSum ("FEllipsis", [ t ])

and vof_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = vof_entity v1 and v2 = vof_variable_definition v2 in
Expand Down
12 changes: 10 additions & 2 deletions semgrep-core/src/core/ast/Pretty_print_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,20 @@ and for_stmt env level (for_tok, hdr, s) =
in
let opt_expr = opt (fun x -> expr env x) in
let hdr_str =
let for_each (pat, tok, e) =
F.sprintf "%s %s %s" (pattern env pat) (token "in" tok) (expr env e)
in
let multi_for_each = function
| FE fe -> for_each fe
| FECond (fe, _, e) -> F.sprintf "%s if %s" (for_each fe) (expr env e)
| FEllipsis _ -> "..."
in
match hdr with
| ForClassic (init, cond, next) ->
F.sprintf "%s; %s; %s" (show_init_list init) (opt_expr cond)
(opt_expr next)
| ForEach (pat, tok, e) ->
F.sprintf "%s %s %s" (pattern env pat) (token "in" tok) (expr env e)
| ForEach (pat, tok, e) -> for_each (pat, tok, e)
| MultiForEach fors -> String.concat ";" (Common.map multi_for_each fors)
| ForEllipsis tok -> token "..." tok
| ForIn (init, exprs) ->
F.sprintf "%s %s %s" (show_init_list init) "in"
Expand Down
17 changes: 13 additions & 4 deletions semgrep-core/src/core/ast/Visitor_AST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -846,14 +846,23 @@ let (mk_visitor :
and v2 = v_option v_expr v2
and v3 = v_option v_expr v3 in
()
| ForEach (v1, t, v2) ->
let t = v_tok t in
let v1 = v_pattern v1 and v2 = v_expr v2 in
()
| ForEach v1 -> v_for_each v1
| MultiForEach v1 -> v_list v_multi_for_each v1
| ForEllipsis t -> v_tok t
| ForIn (v1, v2) ->
let v1 = v_list v_for_var_or_expr v1 and v2 = v_list v_expr v2 in
()
and v_for_each (v1, t, v2) =
let t = v_tok t in
let v1 = v_pattern v1 and v2 = v_expr v2 in
()
and v_multi_for_each = function
| FE v1 -> v_for_each v1
| FECond (v1, t, v2) ->
v_for_each v1;
v_tok t;
v_expr v2
| FEllipsis t -> v_tok t
and v_for_var_or_expr = function
| ForInitVar (v1, v2) ->
let v1 = v_entity v1 and v2 = v_variable_definition v2 in
Expand Down
5 changes: 5 additions & 0 deletions semgrep-core/src/experiments/api/AST_generic_to_v0.ml
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,11 @@ and map_for_header = function
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
`ForEach (v1, t, v2)
| MultiForEach (FE (v1, t, v2) :: _) ->
let t = map_tok t in
let v1 = map_pattern v1 and v2 = map_expr v2 in
`ForEach (v1, t, v2)
| MultiForEach _ -> failwith "todo - MultiForEach"
| ForEllipsis t ->
let t = map_tok t in
`ForEllipsis t
Expand Down
24 changes: 21 additions & 3 deletions semgrep-core/src/matching/Generic_vs_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2250,9 +2250,13 @@ and m_for_header a b =
| G.ForClassic (a1, a2, a3), B.ForClassic (b1, b2, b3) ->
(m_list m_for_var_or_expr) a1 b1 >>= fun () ->
m_option m_expr a2 b2 >>= fun () -> m_option m_expr a3 b3
| G.ForEach (a1, at, a2), B.ForEach (b1, bt, b2) ->
m_pattern a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2
| G.ForEach a1, B.ForEach b1 -> m_for_each a1 b1
| G.MultiForEach a1, B.MultiForEach b1 ->
m_list_with_dots ~less_is_ok:false m_multi_for_each
(function
| G.FEllipsis _ -> true
| _ -> false)
a1 b1
| G.ForIn (a1, a2), B.ForIn (b1, b2) ->
(m_list m_for_var_or_expr) a1 b1 >>= fun () ->
m_list_with_dots m_expr
Expand All @@ -2262,9 +2266,23 @@ and m_for_header a b =
~less_is_ok:false a2 b2
| G.ForClassic _, _
| G.ForEach _, _
| G.MultiForEach _, _
| G.ForIn _, _ ->
fail ()

and m_for_each (a1, at, a2) (b1, bt, b2) =
m_pattern a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2

and m_multi_for_each a b =
match (a, b) with
| G.FE a1, B.FE b1 -> m_for_each a1 b1
| G.FECond (a1, at, a2), B.FECond (b1, bt, b2) ->
m_for_each a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2
| G.FEllipsis _, _ -> return ()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this can can happen given the definition of m_list_with_dots, but I guess it does not hurt.

| _ -> fail ()

and m_block a b =
match (a.s, b.s) with
| G.Block _, B.Block _ -> m_stmt a b
Expand Down
7 changes: 7 additions & 0 deletions semgrep-core/src/matching/SubAST_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ let subexprs_of_stmt_kind = function
| Some cond -> [ H.cond_to_expr cond ])
| Return (_, eopt, _) -> Option.to_list eopt
(* n *)
| For (_, MultiForEach es, _) ->
es
|> List.filter_map (function
| FE (_, _, e) -> Some [ e ]
| FECond ((_, _, e1), _, e2) -> Some [ e1; e2 ]
| FEllipsis _ -> None)
|> List.concat
| For (_, ForClassic (xs, eopt1, eopt2), _) ->
(xs
|> Common.map_filter (function
Expand Down
33 changes: 13 additions & 20 deletions semgrep-core/src/parsing/pfff/scala_to_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -562,24 +562,7 @@ and v_stmt = function
let v1 = v_tok v1
and v2 = v2 |> G.unbracket |> v_enumerators
and v3 = v_for_body v3 in
List.fold_right
(fun gen stmt ->
match gen with
| `G (pat, tok, e, guards) ->
G.For
( v1,
G.ForEach (pat, tok, e),
List.fold_right
(fun (g_tok, g_e) stmt ->
G.If (g_tok, G.Cond g_e, stmt, None) |> G.s)
guards stmt )
|> G.s
| `GIf guards ->
List.fold_right
(fun (g_tok, g_e) stmt ->
G.If (g_tok, G.Cond g_e, stmt, None) |> G.s)
guards stmt)
v2 v3
G.For (v1, G.MultiForEach v2, v3) |> G.s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually try to avoid adding a construct in AST_generic when it's useful only for one language, but
maybe it's ok for this one indeed.

| Return (v1, v2) ->
let v1 = v_tok v1 and v2 = v_option v_expr v2 in
G.Return (v1, v2, G.sc) |> G.s
Expand All @@ -601,8 +584,18 @@ and v_stmt = function
and v_enumerators v = v_list v_enumerator v

and v_enumerator = function
| G v1 -> `G (v_generator v1)
| GIf v1 -> `GIf (v_list v_guard v1)
| G v1 -> (
let pat, tok, e, guards = v_generator v1 in
match guards with
| [] -> G.FE (pat, tok, e)
| (tok2, cond) :: guards ->
let conds =
List.fold_left
(fun e (tok, c) -> G.special (G.Op G.And, tok) [ e; c ])
cond guards
in
G.FECond ((pat, tok, e), tok2, conds))
| GEllipsis tok -> G.FEllipsis tok

and v_generator
{
Expand Down
2 changes: 1 addition & 1 deletion semgrep-core/src/pfff
Submodule pfff updated from f80b02 to 71bbc1
8 changes: 8 additions & 0 deletions semgrep-core/tests/scala/for_loop_ellipsis.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//ERROR: match
val x = for (w <- u if foo) yield w

//ERROR: match
val y = for (a <- b; w <- u if foo; c <- d) yield w

//OK:
val z = for (a <- b; w <- u if bar) yield w
1 change: 1 addition & 0 deletions semgrep-core/tests/scala/for_loop_ellipsis.sgrep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
for (...; $X <- $Y if foo; ...) yield ...