Skip to content
This repository has been archived by the owner on Dec 19, 2023. It is now read-only.

Commit

Permalink
[C++] adapt parser interface to Parse_info.parse_result
Browse files Browse the repository at this point in the history
This will help semgrep/semgrep#1952

test plan:
make
  • Loading branch information
aryx committed Aug 3, 2021
1 parent b121903 commit ebffd58
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 62 deletions.
9 changes: 5 additions & 4 deletions lang_c/parsing/parse_c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)
module PI = Parse_info

let logger = Logging.get_logger [__MODULE__]

Expand All @@ -26,17 +27,17 @@ let logger = Logging.get_logger [__MODULE__]
(*****************************************************************************)

let parse file =
let (ast2, stat) = Parse_cpp.parse_with_lang ~lang:Flag_parsing_cpp.C file in
let ast = ast2 |> List.map fst in
let toks = ast2 |> List.concat_map snd in
let {PI. ast; tokens; stat} =
Parse_cpp.parse_with_lang ~lang:Flag_parsing_cpp.C file in
(* less: merge stat? *)
let ast, stat =
try (Ast_c_build.program ast), stat
with exn ->
logger#error "PB: Ast_c_build, on %s (exn = %s)" file (Common.exn_to_s exn);
(*None, { stat with Stat.bad = stat.Stat.bad + stat.Stat.correct } *)
raise exn
in
{ Parse_info. ast; tokens = toks; stat }
{ Parse_info. ast; tokens; stat }

let parse_program file =
let res = parse file in
Expand Down
9 changes: 4 additions & 5 deletions lang_cpp/analyze/database_light_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,14 @@ let compute_database ?(verbose=false) files_or_dirs =
files |> Console.progress ~show:verbose (fun k ->
List.iter (fun file ->
k ();
let (ast2, _stat) = Parse_cpp.parse file in
let res = Parse_cpp.parse file in

let hcomplete_name_of_info =
(*Class_js.extract_complete_name_of_info ast *)
Hashtbl.create 101
in

ast2 |> List.iter (fun (ast, toks) ->
res |> (fun {PI. ast; tokens = toks; _} ->
let prefs = Highlight_code.default_highlighter_preferences in

Highlight_cpp.visit_toplevel ~tag_hook:(fun info categ ->
Expand Down Expand Up @@ -128,14 +128,13 @@ let compute_database ?(verbose=false) files_or_dirs =
files |> Console.progress ~show:verbose (fun k ->
List.iter (fun file ->
k();
let (ast2, _stat) = Parse_cpp.parse file in
let { PI. ast; tokens = toks; stat = _ } = Parse_cpp.parse file in

let ast = Parse_cpp.program_of_program2 ast2 in
(* work by side effect on ast2 too *)
Check_variables_cpp.check_and_annotate_program
ast;

ast2 |> List.iter (fun (ast, toks) ->
(ast, toks) |> (fun (ast, toks) ->
let prefs = Highlight_code.default_highlighter_preferences in

Highlight_cpp.visit_toplevel ~tag_hook:(fun info categ ->
Expand Down
4 changes: 2 additions & 2 deletions lang_cpp/analyze/highlight_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ let fake_no_use2 = (NoInfoPlace, UniqueDef, MultiUse)
(* Code highlighter *)
(*****************************************************************************)

let visit_toplevel ~tag_hook _prefs (*db_opt *) (toplevel, toks) =
let visit_toplevel ~tag_hook _prefs (*db_opt *) (ast, toks) =
let already_tagged = Hashtbl.create 101 in
let tag = (fun ii categ ->
tag_hook ii categ;
Expand Down Expand Up @@ -444,7 +444,7 @@ let visit_toplevel ~tag_hook _prefs (*db_opt *) (toplevel, toks) =
);
}
in
visitor (Toplevel toplevel);
visitor (Program ast);

(* -------------------------------------------------------------------- *)
(* toks phase 2 *)
Expand Down
2 changes: 1 addition & 1 deletion lang_cpp/analyze/highlight_cpp.mli
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ val visit_toplevel :
(Parse_info.t -> Highlight_code.category -> unit) ->
Highlight_code.highlighter_preferences ->
(*(Database_php.id * Common.filename * Database_php.database) option -> *)
Cst_cpp.toplevel * Parser_cpp.token list ->
Cst_cpp.program * Parser_cpp.token list ->
unit
4 changes: 2 additions & 2 deletions lang_cpp/analyze/test_analyze_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ module PI = Parse_info

let test_highlight_cpp file =

let (ast2, _stat) = Parse_cpp.parse file in
let { PI.ast; tokens = toks; stat = _} = Parse_cpp.parse file in

let h = Hashtbl.create 101 in

ast2 |> List.iter (fun (ast, toks) ->
(ast, toks) |> (fun (ast, toks) ->
(* computing the token attributes *)
let prefs = Highlight_code.default_highlighter_preferences in

Expand Down
55 changes: 24 additions & 31 deletions lang_cpp/parsing/parse_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ module TH = Token_helpers_cpp
module Lexer = Lexer_cpp
module Semantic = Parser_cpp_mly_helper

let logger = Logging.get_logger [__MODULE__]

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
Expand All @@ -38,20 +40,6 @@ module Semantic = Parser_cpp_mly_helper
let use_dypgen = false
*)

(*****************************************************************************)
(* Types *)
(*****************************************************************************)

type toplevels_and_tokens = (Ast.toplevel * Parser_cpp.token list) list

let program_of_program2 xs =
xs |> List.map fst

(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
let pr2, _pr2_once = Common2.mk_pr2_wrappers Flag.verbose_parsing

(*****************************************************************************)
(* Error diagnostic *)
(*****************************************************************************)
Expand Down Expand Up @@ -229,7 +217,7 @@ let add_defs file =
if not (Sys.file_exists file)
then failwith (spf "Could not find %s, have you set PFFF_HOME correctly?"
file);
pr2 (spf "Using %s macro file" file);
logger#info "Using %s macro file" file;
let xs = extract_macros file in
xs |> List.iter (fun (k, v) -> Hashtbl.add _defs k v)

Expand Down Expand Up @@ -260,13 +248,14 @@ let init_defs file =
*)
let rec lexer_function tr = fun lexbuf ->
match tr.PI.rest with
| [] -> (pr2 "LEXER: ALREADY AT END"; tr.PI.current)
| [] -> logger#error "LEXER: ALREADY AT END"; tr.PI.current
| v::xs ->
tr.PI.rest <- xs;
tr.PI.current <- v;
tr.PI.passed <- v::tr.PI.passed;

if !Flag.debug_lexer then pr2_gen v;
if !Flag.debug_lexer
then pr2_gen v;

if TH.is_comment v
then lexer_function (*~pass*) tr lexbuf
Expand All @@ -282,7 +271,7 @@ let passed_a_define tr =
| _ -> false
)
else begin
pr2 "WIERD: length list of error recovery tokens < 2 ";
logger#error "WIERD: length list of error recovery tokens < 2 ";
false
end

Expand All @@ -297,7 +286,7 @@ let passed_a_define tr =
* !!!This function use refs, and is not reentrant !!! so take care.
* It uses the _defs global defined above!!!!
*)
let parse_with_lang ?(lang=Flag_parsing_cpp.Cplusplus) file =
let parse_with_lang ?(lang=Flag_parsing_cpp.Cplusplus) file : (Ast.program, T.token) PI.parsing_result =

let stat = Parse_info.default_stat file in
let filelines = Common2.cat_array file in
Expand All @@ -310,7 +299,7 @@ let parse_with_lang ?(lang=Flag_parsing_cpp.Cplusplus) file =
let toks =
try Parsing_hacks.fix_tokens ~macro_defs:_defs lang toks_orig
with Token_views_cpp.UnclosedSymbol s ->
pr2 s;
logger#error "unclosed symbol %s" s;
if !Flag_cpp.debug_cplusplus
then raise (Token_views_cpp.UnclosedSymbol s)
else toks_orig
Expand Down Expand Up @@ -354,7 +343,7 @@ let parse_with_lang ?(lang=Flag_parsing_cpp.Cplusplus) file =
Parser_cpp2.toplevel (lexer_function tr) lexbuf_fake
|> List.hd |> fst
with Failure "hd" ->
pr2 "no elements";
logger#error "no elements";
raise Parsing.Parse_error
)
*)
Expand Down Expand Up @@ -467,10 +456,12 @@ let parse_with_lang ?(lang=Flag_parsing_cpp.Cplusplus) file =
| Some xs -> (xs, info):: loop () (* recurse *)
)
in
let v = loop() in
(v, stat)
let xs = loop() in
let ast = xs |> List.map fst in
let tokens = xs |> List.map snd |> List.flatten in
{PI. ast; tokens; stat }

let parse2 file =
let parse2 file : (Ast.program, T.token) PI.parsing_result =
match File_type.file_type_of_file file with
| FT.PL (FT.C _) ->
(try
Expand All @@ -483,19 +474,21 @@ let parse2 file =
| _ -> failwith (spf "not a C/C++ file: %s" file)


let parse file =
let parse file : (Ast.program, T.token) PI.parsing_result =
Common.profile_code "Parse_cpp.parse" (fun () ->
try
parse2 file
with Stack_overflow ->
pr2 (spf "PB stack overflow in %s" file);
[(Ast.NotParsedCorrectly [], ([]))],
{ (Stat.bad_stat file) with Stat.have_timeout = true }
logger#error "PB stack overflow in %s" file;
{ PI.ast = [Ast.NotParsedCorrectly []];
tokens = [];
stat = { (Stat.bad_stat file) with Stat.have_timeout = true }
}
)

let parse_program file =
let (ast2, _stat) = parse file in
program_of_program2 ast2
let res = parse file in
res.PI.ast

(*****************************************************************************)
(* Sub parsers *)
Expand All @@ -510,7 +503,7 @@ let any_of_string lang s =
let toks =
try Parsing_hacks.fix_tokens ~macro_defs:_defs lang toks_orig
with Token_views_cpp.UnclosedSymbol s ->
pr2 s;
logger#error "unclosed symbol %s" s;
if !Flag_cpp.debug_cplusplus
then raise (Token_views_cpp.UnclosedSymbol s)
else toks_orig
Expand Down
12 changes: 4 additions & 8 deletions lang_cpp/parsing/parse_cpp.mli
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@

(* the token list contains also the comment-tokens *)
type toplevels_and_tokens = (Cst_cpp.toplevel * Parser_cpp.token list) list

(* This is the main function. It uses _defs below which often comes
* from a standard.h macro file. It will raise Parse_error unless
* Flag_parsing_cpp.error_recovery is set.
*)
val parse:
Common.filename -> (toplevels_and_tokens * Parse_info.parsing_stat)
Common.filename ->
(Cst_cpp.program, Parser_cpp.token) Parse_info.parsing_result

val parse_program:
Common.filename -> Cst_cpp.program
val parse_with_lang:
?lang:Flag_parsing_cpp.language ->
Common.filename -> (toplevels_and_tokens * Parse_info.parsing_stat)
Common.filename ->
(Cst_cpp.program, Parser_cpp.token) Parse_info.parsing_result


(* other parsers *)
Expand Down Expand Up @@ -45,6 +44,3 @@ val extract_macros:

(* subsystem testing *)
val tokens: Common.filename -> Parser_cpp.token list

(* a few helpers *)
val program_of_program2: toplevels_and_tokens -> Cst_cpp.program
13 changes: 8 additions & 5 deletions lang_cpp/parsing/test_parsing_cpp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,20 @@ let test_parse_cpp ?lang xs =

fullxs |> Console.progress (fun k -> List.iter (fun file ->
k();
let (_xs, stat) =
let stat =
try (
Common.save_excursion Flag.error_recovery true (fun () ->
Common.save_excursion Flag.exn_when_lexical_error false (fun () ->
match lang with
| None -> Parse_cpp.parse file
| Some lang -> Parse_cpp.parse_with_lang ~lang file
let res =
match lang with
| None -> Parse_cpp.parse file
| Some lang -> Parse_cpp.parse_with_lang ~lang file
in
res.PI.stat
))
) with exn -> (* TODO: be more strict, List.hd failure, Stack overflow *)
pr2 (spf "PB on %s, exn = %s" file (Common.exn_to_s exn));
[], PI.bad_stat file
PI.bad_stat file
in
Common.push stat stat_list;

Expand Down
6 changes: 2 additions & 4 deletions lang_ml/parsing/parse_ml.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,11 @@ let error_msg_tok tok =
(*****************************************************************************)
(* Lexing only *)
(*****************************************************************************)
let tokens2 file =
let tokens file =
let token = Lexer_ml.token in
Parse_info.tokenize_all_and_adjust_pos
file token TH.visitor_info_of_tok TH.is_eof

let tokens a =
Common.profile_code "Parse_ml.tokens" (fun () -> tokens2 a)
[@@profiling]

(*****************************************************************************)
(* Main entry point *)
Expand Down

0 comments on commit ebffd58

Please sign in to comment.