Skip to content
This repository has been archived by the owner on Dec 19, 2023. It is now read-only.

Commit

Permalink
* lang_cpp/parsing/parse_cpp.ml: Use Parse_info.tokenize_all_and_adju…
Browse files Browse the repository at this point in the history
…st_pos

This will help semgrep/semgrep#1925
The helper tokenize_all_and_adjust_pos correctly intercept Lexical_error
and adjust the file position of the token inside the Lexical_error.
When I introduced this helper function, I forgot to use it for the
C/C++ parser (not sure why, maybe because the code was also handling
ExpandedTok).

test plan:
$ semgrep -l c -e 'FOO' /tmp/foo.c
ran 1 rules on 1 files: 0 findings
1 files could not be analyzed; run with --verbose for details or run with --strict to exit non-zero if any file cannot be analyzed

does not generate Python backtrace anymore.

Same with
$ /home/pad/semgrep/_build/default/cli/Main.exe -dump_ast /tmp/foo.c
/tmp/foo.c:3:0: Lexical error: unrecognised symbol, in token rule:#
Raised at file "parsing/Parse_code.ml", line 144, characters 24-27
Called from file "parsing/Parse_code.ml", line 236, characters 18-48
Called from file "cli/Main.ml", line 855, characters 6-72
Called from file "pfff/h_program-lang/Error_code.ml", line 388, characters 4-8

no more "NO FILE INFO YET" exn.
  • Loading branch information
aryx committed Nov 9, 2020
1 parent 5b7211c commit f0b5b5b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 36 deletions.
9 changes: 7 additions & 2 deletions h_program-lang/Parse_info.ml
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,13 @@ let tokenize_all_and_adjust_pos ?(unicode_hack=false)
{ ii with token =
(* could assert pinfo.filename = file ? *)
match ii.token with
| OriginTok pi -> OriginTok(complete_token_location_large file table pi)
| _ -> raise Todo
| OriginTok pi ->
OriginTok(complete_token_location_large file table pi)
| ExpandedTok (pi,vpi, off) ->
ExpandedTok(complete_token_location_large file table pi,vpi, off)
| FakeTokStr (s,vpi_opt) ->
FakeTokStr (s,vpi_opt)
| Ab -> raise Impossible
}
in
let rec tokens_aux acc =
Expand Down
38 changes: 4 additions & 34 deletions lang_cpp/parsing/parse_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -119,40 +119,10 @@ let is_same_line_or_close line tok =
(*****************************************************************************)

(* called by parse below *)
let tokens2 file =
let table = Parse_info.full_charpos_to_pos_large file in

Common.with_open_infile file (fun chan ->
let lexbuf = Lexing.from_channel chan in

let rec tokens_aux () =
let tok = Lexer.token lexbuf in
(* fill in the line and col information *)
let tok = tok |> TH.visitor_info_of_tok (fun ii ->
{ ii with PI.token=
(* could assert pinfo.filename = file ? *)
match ii.PI.token with
| PI.OriginTok pi ->
PI.OriginTok (Parse_info.complete_token_location_large file
table pi)
| PI.ExpandedTok (pi,vpi, off) ->
PI.ExpandedTok(
(Parse_info.complete_token_location_large file table pi),vpi,
off)
| PI.FakeTokStr (s,vpi_opt) -> PI.FakeTokStr (s,vpi_opt)
| PI.Ab -> raise Impossible
})
in

if TH.is_eof tok
then [tok]
else tok::(tokens_aux ())
in
tokens_aux ()
)

let tokens a =
Common.profile_code "Parse_cpp.tokens" (fun () -> tokens2 a)
let tokens file =
Parse_info.tokenize_all_and_adjust_pos ~unicode_hack:false
file Lexer.token TH.visitor_info_of_tok TH.is_eof
[@@profiling]

(*****************************************************************************)
(* Fuzzy parsing *)
Expand Down

0 comments on commit f0b5b5b

Please sign in to comment.