From 336feb173de0d5a622af82e0fcfd25ffe357c811 Mon Sep 17 00:00:00 2001 From: Iago Abal Date: Mon, 14 Aug 2023 10:40:25 +0200 Subject: [PATCH] matching: ellipsis: Always try going deep Follows: c1ca429c183 ("Optimize deep statement matching (#852)") test plan: make test # new tests --- changelog.d/gh-660.fixed | 24 ++++++++++++++++++++++ src/matching/Generic_vs_generic.ml | 16 +++++++-------- tests/rules/ellipsis_stmts_deep.py | 13 ++++++++++++ tests/rules/ellipsis_stmts_deep.yaml | 29 +++++++++++++++++++++++++++ tests/rules/ellipsis_stmts_deep1.ts | 13 ++++++++++++ tests/rules/ellipsis_stmts_deep1.yaml | 19 ++++++++++++++++++ tests/semgrep-rules | 2 +- 7 files changed, 107 insertions(+), 9 deletions(-) create mode 100644 changelog.d/gh-660.fixed create mode 100644 tests/rules/ellipsis_stmts_deep.py create mode 100644 tests/rules/ellipsis_stmts_deep.yaml create mode 100644 tests/rules/ellipsis_stmts_deep1.ts create mode 100644 tests/rules/ellipsis_stmts_deep1.yaml diff --git a/changelog.d/gh-660.fixed b/changelog.d/gh-660.fixed new file mode 100644 index 000000000000..eda3cdd739a1 --- /dev/null +++ b/changelog.d/gh-660.fixed @@ -0,0 +1,24 @@ +Fixed a regression introduced three years ago in 0.9.0, when optimizing +the evaluation of `...` (ellipsis) to be faster. We made `...` only match +"deeply" (inside an `if` for example) if nothing else matched, thus +causing that this pattern: + +```python +foo() +... +bar($A) +``` + +would only produce a match rather than two: +```python +foo() +if cond: + bar(x) +bar(y) +``` + +Semgrep matched from `foo()` to `bar(y)`, but it did not match from +`foo()` to `bar(x)`. However, if commented out `bar(y)`, then Semgrep +matched `bar(x)`. + +Semgrep now produces the two expected matches. diff --git a/src/matching/Generic_vs_generic.ml b/src/matching/Generic_vs_generic.ml index 9c5c468135ea..e3f575d8b322 100644 --- a/src/matching/Generic_vs_generic.ml +++ b/src/matching/Generic_vs_generic.ml @@ -2238,14 +2238,14 @@ and m_stmts_deep ~inside ~less_is_ok (xsa : G.stmt list) (xsb : G.stmt list) = | ( ({ s = G.ExprStmt ({ e = G.Ellipsis _i; _ }, _); _ } :: _ as xsa), (_ :: _ as xsb) ) -> (* let's first try without going deep *) - m_list__m_stmt xsa xsb >!> fun () -> - if_config - (fun x -> x.go_deeper_stmt) - ~then_: - (match SubAST_generic.flatten_substmts_of_stmts xsb with - | None -> fail () (* was already flat *) - | Some (xsb, _UNUSED_last_stmt) -> m_list__m_stmt xsa xsb) - ~else_:(fail ()) + m_list__m_stmt xsa xsb + >||> if_config + (fun x -> x.go_deeper_stmt) + ~then_: + (match SubAST_generic.flatten_substmts_of_stmts xsb with + | None -> fail () (* was already flat *) + | Some (xsb, _UNUSED_last_stmt) -> m_list__m_stmt xsa xsb) + ~else_:(fail ()) (* dots: metavars: $...BODY *) | ( ({ s = G.ExprStmt ({ e = G.N (G.Id ((s, _), _idinfo)); _ }, _); _ } :: _ as xsa), diff --git a/tests/rules/ellipsis_stmts_deep.py b/tests/rules/ellipsis_stmts_deep.py new file mode 100644 index 000000000000..b0d04e5093d5 --- /dev/null +++ b/tests/rules/ellipsis_stmts_deep.py @@ -0,0 +1,13 @@ +# https://github.com/returntocorp/semgrep-rules/issues/660 + +def decorator_factory( foo ): + def decorator( function ): + # ok:reproducer-660 + def function_wrapper( *args, **kwargs ): + # Do something with 'foo'. + return function( *args, **kwargs ) + return function_wrapper + return decorator + +@decorator_factory( 'bar' ) +def test( ): ''' Simple reproducer. ''' diff --git a/tests/rules/ellipsis_stmts_deep.yaml b/tests/rules/ellipsis_stmts_deep.yaml new file mode 100644 index 000000000000..b2d6a0b9276f --- /dev/null +++ b/tests/rules/ellipsis_stmts_deep.yaml @@ -0,0 +1,29 @@ +rules: + - id: reproducer-660 + patterns: + - pattern-inside: | + def $F(...): + ... + def $FF(...): + ... + ... + - pattern-not-inside: | + def $F(...): + ... + def $FF(...): + ... + ... + <... $FF ...> + - pattern: | + def $FF(...): + ... + - focus-metavariable: $FF + message: function `$FF` is defined inside a function but never used + languages: + - python + severity: ERROR + metadata: + category: maintainability + technology: + - python + license: Commons Clause License Condition v1.0[LGPL-2.1-only] diff --git a/tests/rules/ellipsis_stmts_deep1.ts b/tests/rules/ellipsis_stmts_deep1.ts new file mode 100644 index 000000000000..60a7d2c75d77 --- /dev/null +++ b/tests/rules/ellipsis_stmts_deep1.ts @@ -0,0 +1,13 @@ + +foo(); + +obj.met(async () => { + something(); + // ruleid: test + x = baz(); +}); + +bar(); + +// ok: test +y = baz(); diff --git a/tests/rules/ellipsis_stmts_deep1.yaml b/tests/rules/ellipsis_stmts_deep1.yaml new file mode 100644 index 000000000000..0027d73a33db --- /dev/null +++ b/tests/rules/ellipsis_stmts_deep1.yaml @@ -0,0 +1,19 @@ +rules: + - id: test + message: > + Test + languages: + - typescript + severity: WARNING + patterns: + - pattern: baz() + - pattern-inside: | + foo(); + ... + $X = baz(); + - pattern-not-inside: | + foo(); + ... + bar(); + ... + $X = baz(); diff --git a/tests/semgrep-rules b/tests/semgrep-rules index 5ce59a973a33..70bd594c73b5 160000 --- a/tests/semgrep-rules +++ b/tests/semgrep-rules @@ -1 +1 @@ -Subproject commit 5ce59a973a3388827755336c2dad1eeb73d50e2c +Subproject commit 70bd594c73b586152d446aac00d4a37629fb3248