Skip to content

Commit

Permalink
fix(kotlin): allow newlines in class declarations (#10015)
Browse files Browse the repository at this point in the history
Fixes: SAF-899
Fixes: SAF-923

Requires: semgrep/ocaml-tree-sitter-semgrep#477

The current version of Kotlin's tree-sitter doesn't support newlines
when declaring classes. It has to do with automatic semicolons. For
example, the following won't parse:

```
class C
constructor(arg:Int){}
```

but the following parses fine:

```
class C constructor(arg:Int){}
```

The fix requires a hack in the grammar. (Ideally the scanner seems to be
the right place for a fix, but modifying the scanner is complicated.)

The hack works as follows. When the parser sees the code:

```
class C
constructor(arg:Int){}
```

The first line is parsed as a class declaration node, and the second
line is parsed as a partial class declaration node. These need to be
consolidated during CST -> Generic AST. This PR implements this merge.

Test plan:
* `make core-test` with new tests
* ran `semgrep-core -lang kotlin -dump_ast` with some examples that have
new lines in class declarations, including the one reported on Linear.
The output AST shows that there's a single class definition that is
correctly populated.
  • Loading branch information
amchiclet committed Mar 27, 2024
1 parent 670e028 commit 9fffde4
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 2 deletions.
12 changes: 12 additions & 0 deletions changelog.d/saf-899.fixed
@@ -0,0 +1,12 @@
Fixed a parsing error in Kotlin when there's a newline between the class name and the primary constructor.

This could not parse before

```
class C
constructor(arg:Int){}
```

because of the newline between the class name and the constructor.

Now it's fixed.
107 changes: 106 additions & 1 deletion languages/kotlin/generic/Parse_kotlin_tree_sitter.ml
Expand Up @@ -56,6 +56,22 @@ let vars_to_pattern (l, xs, r) =
let ys = xs |> List_.map (fun (id, ptype) -> var_to_pattern (id, ptype)) in
PatTuple (l, ys, r)

(*****************************************************************************)
(* Constants *)
(*****************************************************************************)

(* The fake name @@@PARTIAL_CLASS_DECLARATION is used to distinguish
* 1) two valid class declarations in a row vs
* 2) a class declaration followed by a partial class declaration
*
* Valid kotlin programs cannot have class names that have @@@, so we will
* know that this did not come from the source code.
*
* We will use this fake name to detect partial class
* declarations in the function merge_class_declarations below.
*)
let fake_name_for_partial_class_decl = "@@@PARTIAL_CLASS_DECLARATION"

(*****************************************************************************)
(* Boilerplate converter *)
(*****************************************************************************)
Expand Down Expand Up @@ -1859,6 +1875,40 @@ and statement (env : env) (x : CST.statement) : stmt =
G.exprstmt v1
in
v2
| `Part_class_decl (v1, v2, v3, v4, v5, v6, v7) ->
let tparams =
match v1 with
| Some x -> Some (type_parameters env x)
| None -> None
in
let cparams = primary_constructor env (Some (v2, v3), v4) in
let cextends =
match v5 with
| Some (v1, v2) ->
let _v1 = token env v1 (* ":" *) in
let v2 = delegation_specifiers env v2 in
v2
| None -> []
in
let _type_constraints_TODO =
match v6 with
| Some x -> type_constraints env x
| None -> []
in
let cbody =
match v7 with
| Some x -> class_body env x
| None -> fb []
in
let ckind = (Class, fake "class") in
(* See Constants section above why we use fake_name_for_partial_class_decl. *)
let fake_ident = (fake_name_for_partial_class_decl, fake "fake_tok") in
let ent = basic_entity fake_ident ~tparams in
let cdef =
{ ckind; cextends; cimplements = []; cmixins = []; cparams; cbody }
in
let def = (ent, ClassDef cdef) in
DefStmt def |> G.s

and statements (env : env) ((v1, v2, v3) : CST.statements) =
let v1 = statement env v1 in
Expand Down Expand Up @@ -2258,6 +2308,60 @@ let file_annotation (env : env) ((v1, v2, v3, v4, v5) : CST.file_annotation) =
let _semi = semi env v5 in
()

let merge_class_declarations (xs : stmt list) : stmt list =
let rec merge rev_acc (xs : stmt list) =
match xs with
(* Merge two consecutive class declarations, if the second one
* is a partial class declaration.
*
* For the first class declaration, only the name, kind of class,
* attributes, and type parameters will/may be populated because
* it will come from the code
* @SomeAttribute class SomeClassName < TypeParam >
* or
* @SomeAttribute enum SomeClassName < TypeParam >
*
* The rest is populated by the second part which is a partial
* class declaration.
*
* We expect that each class declaration will have at most one
* corresponding partial class declaration that follows immediately.
*
* If we missed something and somehow this is not the case, we will
* let the partial class declaration remain in the Generic AST.
*)
| { s = DefStmt ({ name; attrs; tparams }, ClassDef { ckind; _ }); _ }
:: {
s =
DefStmt
( {
name = EN (Id ((class_name, _), _));
attrs = [];
tparams = new_tparams;
},
ClassDef
{ ckind = _; cextends; cimplements; cmixins; cparams; cbody }
);
_;
}
:: rest
when class_name = fake_name_for_partial_class_decl ->
let tparams =
match tparams with
| Some _ -> tparams
| None -> new_tparams
in
let ent = { name; attrs; tparams } in
let cdef =
ClassDef { ckind; cextends; cimplements; cmixins; cparams; cbody }
in
let stmt = DefStmt (ent, cdef) |> G.s in
merge (stmt :: rev_acc) rest
| x :: rest -> merge (x :: rev_acc) rest
| [] -> List.rev rev_acc
in
merge [] xs

let source_file (env : env) (x : CST.source_file) : any =
match x with
| `Opt_sheb_line_rep_file_anno_opt_pack_header_rep_import_list_rep_stmt_semi
Expand All @@ -2282,8 +2386,9 @@ let source_file (env : env) (x : CST.source_file) : any =
v1)
v5
in
let xs = merge_class_declarations v5 in
let dirs = v3 @ v4 |> List_.map (fun d -> DirectiveStmt d |> G.s) in
Pr (dirs @ v5)
Pr (dirs @ xs)
| `Semg_exp (_v1, v2) ->
let v2 = expression env v2 in
E v2
Expand Down
2 changes: 1 addition & 1 deletion languages/kotlin/tree-sitter/semgrep-kotlin
3 changes: 3 additions & 0 deletions tests/parsing/kotlin/class_newline.kt
@@ -0,0 +1,3 @@
class C
constructor(arg: Int)
{}
6 changes: 6 additions & 0 deletions tests/patterns/kotlin/bigger_class_with_newline.kt
@@ -0,0 +1,6 @@
// ERROR:
@Annotation1 class BiggerClass< X, Int >

@Annotation2 constructor(arg: Int): BaseClass where X: Int {
@Annotation3 constructor(arg1: Int, arg2: Int) {}
}
4 changes: 4 additions & 0 deletions tests/patterns/kotlin/bigger_class_with_newline.sgrep
@@ -0,0 +1,4 @@
class $X <$T1, $T2>
constructor (arg:Int): $BASE_CLASS {
...
}
7 changes: 7 additions & 0 deletions tests/patterns/kotlin/class_newline.kt
@@ -0,0 +1,7 @@
// ERROR:
class MyClass constructor(arg: Int) {}

// ERROR:
class MyClass
constructor(arg: Int)
{}
9 changes: 9 additions & 0 deletions tests/patterns/kotlin/class_newline.sgrep
@@ -0,0 +1,9 @@
class $X

constructor

(arg: Int)

{

}

0 comments on commit 9fffde4

Please sign in to comment.