Skip to content

Commit

Permalink
Introduce Type_generic.ml
Browse files Browse the repository at this point in the history
This refactors m_compatible_type to go in 2 steps:
one step to extract the type, and another for the match

This will help #5350

test plan:
make test
  • Loading branch information
aryx committed May 30, 2022
1 parent 2ece212 commit c93a25b
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 65 deletions.
68 changes: 68 additions & 0 deletions semgrep-core/src/core/ast/Type_generic.ml
@@ -0,0 +1,68 @@
(* Yoann Padioleau
*
* Copyright (C) 2022 r2c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)
module G = AST_generic

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(* Common representation for (builtin) types across languages.
*
* In certain languages, a boolean declaration is using the type name
* "boolean", in another language "Bool", in yet another "bool".
* This is tedious to handle all those specifities during matching, so
* the goal of this file is to factorize those specifities here.
*
* alt:
* - store this directly in AST_generic.ml and do the work in the
* xxx_to_generic.ml
*
* related:
* - Type.ml in deep-semgrep
*)

(*****************************************************************************)
(* Types *)
(*****************************************************************************)

(*
* coupling: this is mostly used for typed metavariables like '($X: int)' to
* match literals like 'f(1)', so most of the types below should have
* a corresponding construct in AST_generic.literal
*)
type builtin_type = TInt | TNumber | TString | TFloat [@@deriving show]

(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)

(* less: should sanity check things by looking at [lang] *)
let builtin_type_of_ident _langTODO str =
match str with
| "int" -> Some TInt
| "float" -> Some TFloat
| "str"
| "string"
| "String" ->
Some TString
| _ -> None

let builtin_type_of_type lang t =
match t.G.t with
(* for Python literal checking *)
| G.TyExpr { e = G.N (G.Id ((str, _t), _idinfo)); _ } ->
builtin_type_of_ident lang str
(* for Java/Go/... literals *)
| G.TyN (Id ((str, _t), _idinfo)) -> builtin_type_of_ident lang str
| _ -> None
Expand Up @@ -217,7 +217,7 @@ let tests =
in
let matches_with_env =
let env =
Matching_generic.empty_environment None None
Matching_generic.empty_environment None lang
Config_semgrep.default_config
in
Match_patterns.match_any_any pattern code env
Expand Down
7 changes: 3 additions & 4 deletions semgrep-core/src/matching/Apply_equivalences.ml
Expand Up @@ -26,7 +26,7 @@ module Env = Metavariable_capture
(* Matchers for code equivalence mode *)
(*****************************************************************************)

let match_e_e_for_equivalences _ruleid a b =
let match_e_e_for_equivalences _ruleid lang a b =
Common.save_excursion Flag.equivalence_mode true (fun () ->
let config =
{
Expand All @@ -36,7 +36,6 @@ let match_e_e_for_equivalences _ruleid a b =
}
in
let cache = None in
let lang = None in
let env = Matching_generic.empty_environment cache lang config in
Generic_vs_generic.m_expr a b env)

Expand Down Expand Up @@ -71,7 +70,7 @@ let subst_e (env : Env.t) e =
in
visitor.M.vexpr e

let apply equivs any =
let apply equivs lang any =
let expr_rules = ref [] in
let stmt_rules = ref [] in

Expand Down Expand Up @@ -106,7 +105,7 @@ let apply equivs any =
| (l, r) :: xs -> (
(* look for a match on original x, not x' *)
let matches_with_env =
match_e_e_for_equivalences "<equivalence>" l x
match_e_e_for_equivalences "<equivalence>" lang l x
in
match matches_with_env with
(* todo: should generate a Disj for each possibilities? *)
Expand Down
2 changes: 1 addition & 1 deletion semgrep-core/src/matching/Apply_equivalences.mli
@@ -1 +1 @@
val apply : Equivalence.t list -> Pattern.t -> Pattern.t
val apply : Equivalence.t list -> Lang.t -> Pattern.t -> Pattern.t
73 changes: 36 additions & 37 deletions semgrep-core/src/matching/Generic_vs_generic.ml
Expand Up @@ -31,6 +31,7 @@ module MV = Metavariable
module Flag = Flag_semgrep
module Config = Config_semgrep_t
module H = AST_generic_helpers
module T = Type_generic

(* optimisations *)
module CK = Caching.Cache_key
Expand Down Expand Up @@ -724,7 +725,7 @@ and m_expr a b =
envf (str, tok) (MV.E b)
(* metavar: typed! *)
| G.TypedMetavar ((str, tok), _, t), _b when MV.is_metavar_name str ->
m_compatible_type (str, tok) t b
with_lang (fun lang -> m_compatible_type lang (str, tok) t b)
(* dots: should be patterned-match before in arguments, or statements,
* but this is useful for keyword parameters, as in f(..., foo=..., ...)
*)
Expand All @@ -743,7 +744,7 @@ and m_expr a b =
(with_lang (fun lang ->
match
Constant_propagation.constant_propagation_and_evaluate_literal
?lang b
~lang b
with
| Some b1 -> m_literal_svalue a1 b1
| None -> fail ()))
Expand Down Expand Up @@ -1185,43 +1186,41 @@ and m_container_ordered_elements a b =
* which would require to transform the code in the generic_vs_generic
* style as typechecking could also bind metavariables in the process
*)
and m_compatible_type typed_mvar t e =
match (t.G.t, e.G.e) with
(* for Python literal checking *)
| G.TyExpr { e = G.N (G.Id (("int", _tok), _idinfo)); _ }, B.L (B.Int _) ->
envf typed_mvar (MV.E e)
| G.TyExpr { e = G.N (G.Id (("float", _tok), _idinfo)); _ }, B.L (B.Float _)
->
envf typed_mvar (MV.E e)
| G.TyExpr { e = G.N (G.Id (("str", _tok), _idinfo)); _ }, B.L (B.String _) ->
envf typed_mvar (MV.E e)
(* for C specific literals *)
| G.TyPointer (_, { t = TyN (G.Id (("char", _), _)); _ }), B.L (B.String _) ->
envf typed_mvar (MV.E e)
| G.TyPointer (_, _), B.L (B.Null _) -> envf typed_mvar (MV.E e)
(* for Java and Go literals *)
| G.TyN (Id (("int", _), _)), B.L (B.Int _) -> envf typed_mvar (MV.E e)
| G.TyN (Id (("float", _), _)), B.L (B.Float _) -> envf typed_mvar (MV.E e)
| G.TyN (Id ((("string" | "String"), _), _)), B.L (B.String _) ->
envf typed_mvar (MV.E e)
(* for C strings to match metavariable pointer types *)
| ( G.TyPointer (t1, { t = G.TyN (G.Id ((_, tok), id_info)); _ }),
B.L (B.String _) ) ->
m_type_ t
(G.TyPointer (t1, G.TyN (G.Id (("char", tok), id_info)) |> G.t) |> G.t)
>>= fun () -> envf typed_mvar (MV.E e)
(* for matching ids *)
(* this is covered by the basic type propagation done in Naming_AST.ml *)
| _ta, B.N (B.Id (idb, ({ B.id_type = tb; _ } as id_infob))) ->
(* NOTE: Name values must be represented with MV.Id! *)
m_type_option_with_hook idb (Some t) !tb >>= fun () ->
envf typed_mvar (MV.Id (idb, Some id_infob))
| _ta, _eb -> (
match type_of_expr e with
| Some (idb, tb) ->
m_type_option_with_hook idb (Some t) tb >>= fun () ->
and m_compatible_type lang typed_mvar t e =
match (Type_generic.builtin_type_of_type lang t, e.G.e) with
| Some builtin, B.L lit -> (
match (builtin, lit) with
| T.TInt, B.Int _
| T.TFloat, B.Float _
| T.TString, B.String _ ->
envf typed_mvar (MV.E e)
| _ -> fail ())
| _ -> (
match (t.G.t, e.G.e) with
(* for C specific literals *)
| ( G.TyPointer (_, { t = TyN (G.Id (("char", _), _)); _ }),
B.L (B.String _) ) ->
envf typed_mvar (MV.E e)
| G.TyPointer (_, _), B.L (B.Null _) -> envf typed_mvar (MV.E e)
(* for C strings to match metavariable pointer types *)
| ( G.TyPointer (t1, { t = G.TyN (G.Id ((_, tok), id_info)); _ }),
B.L (B.String _) ) ->
m_type_ t
(G.TyPointer (t1, G.TyN (G.Id (("char", tok), id_info)) |> G.t)
|> G.t)
>>= fun () -> envf typed_mvar (MV.E e)
(* for matching ids *)
(* this is covered by the basic type propagation done in Naming_AST.ml *)
| _ta, B.N (B.Id (idb, ({ B.id_type = tb; _ } as id_infob))) ->
(* NOTE: Name values must be represented with MV.Id! *)
m_type_option_with_hook idb (Some t) !tb >>= fun () ->
envf typed_mvar (MV.Id (idb, Some id_infob))
| _ta, _eb -> (
match type_of_expr e with
| Some (idb, tb) ->
m_type_option_with_hook idb (Some t) tb >>= fun () ->
envf typed_mvar (MV.E e)
| _ -> fail ()))

(* returns a type option and an ident that can be used to query LSP *)
and type_of_expr e : (G.ident * G.type_ option) option =
Expand Down
14 changes: 5 additions & 9 deletions semgrep-core/src/matching/Match_patterns.ml
Expand Up @@ -142,7 +142,7 @@ let match_rules_and_recurse lang config (file, hook, matches) rules matcher k
any x =
rules
|> List.iter (fun (pattern, rule, cache) ->
let env = MG.empty_environment cache (Some lang) config in
let env = MG.empty_environment cache lang config in
let matches_with_env = matcher rule pattern x env in
if matches_with_env <> [] then
(* Found a match *)
Expand Down Expand Up @@ -233,7 +233,7 @@ let check2 ~hook range_filter (config, equivs) rules (file, lang, ast) =
|> List.iter (fun rule ->
(* less: normalize the pattern? *)
let any = rule.MR.pattern in
let any = Apply_equivalences.apply equivs any in
let any = Apply_equivalences.apply equivs lang any in
let cache =
if !Flag.with_opt_cache then Some (Caching.Cache.create ())
else None
Expand Down Expand Up @@ -278,9 +278,7 @@ let check2 ~hook range_filter (config, equivs) rules (file, lang, ast) =
(show_expr_kind x.e);
()
| Some range_loc when range_filter range_loc ->
let env =
MG.empty_environment cache (Some lang) config
in
let env = MG.empty_environment cache lang config in
let matches_with_env = match_e_e rule pattern x env in
if matches_with_env <> [] then
(* Found a match *)
Expand Down Expand Up @@ -316,7 +314,7 @@ let check2 ~hook range_filter (config, equivs) rules (file, lang, ast) =
let visit_stmt () =
!stmt_rules
|> List.iter (fun (pattern, _pattern_strs, rule, cache) ->
let env = MG.empty_environment cache (Some lang) config in
let env = MG.empty_environment cache lang config in
let matches_with_env = match_st_st rule pattern x env in
if matches_with_env <> [] then
(* Found a match *)
Expand Down Expand Up @@ -377,9 +375,7 @@ let check2 ~hook range_filter (config, equivs) rules (file, lang, ast) =
!stmts_rules
|> List.iter (fun (pattern, _pattern_strs, rule, cache) ->
Common.profile_code "Semgrep_generic.kstmts" (fun () ->
let env =
MG.empty_environment cache (Some lang) config
in
let env = MG.empty_environment cache lang config in
let matches_with_env =
match_sts_sts rule pattern x env
in
Expand Down
12 changes: 3 additions & 9 deletions semgrep-core/src/matching/Matching_generic.ml
Expand Up @@ -79,7 +79,7 @@ type tin = {
stmts_match_span : Stmts_match_span.t;
cache : tout Caching.Cache.t option;
(* TODO: this does not have to be in tout; maybe split tin in 2? *)
lang : Lang.t option;
lang : Lang.t;
config : Config_semgrep.t;
}

Expand Down Expand Up @@ -342,14 +342,8 @@ let (envf : MV.mvar G.wrap -> MV.mvalue -> tin -> tout) =
(lazy (spf "envf: success, %s (%s)" mvar (MV.str_of_mval any)));
return new_binding

let empty_environment opt_cache opt_lang config =
{
mv = Env.empty;
stmts_match_span = Empty;
cache = opt_cache;
lang = opt_lang;
config;
}
let empty_environment opt_cache lang config =
{ mv = Env.empty; stmts_match_span = Empty; cache = opt_cache; lang; config }

(*****************************************************************************)
(* Helpers *)
Expand Down
6 changes: 3 additions & 3 deletions semgrep-core/src/matching/Matching_generic.mli
Expand Up @@ -6,7 +6,7 @@ type tin = {
stmts_match_span : Stmts_match_span.t;
cache : tout Caching.Cache.t option;
(* TODO: this does not have to be in tout; maybe split tin in 2? *)
lang : Lang.t option;
lang : Lang.t;
config : Config_semgrep.t;
}

Expand Down Expand Up @@ -47,7 +47,7 @@ val or_list : 'a matcher -> 'a -> 'a list -> tin -> tout
val ( let* ) : (tin -> tout) -> (unit -> tin -> tout) -> tin -> tout

val empty_environment :
tout Caching.Cache.t option -> Lang.t option -> Config_semgrep.t -> tin
tout Caching.Cache.t option -> Lang.t -> Config_semgrep.t -> tin

val add_mv_capture : Metavariable.mvar -> Metavariable.mvalue -> tin -> tin

Expand All @@ -65,7 +65,7 @@ val if_config :
tin ->
tout

val with_lang : (Lang.t option -> tin -> 'a) -> tin -> 'a
val with_lang : (Lang.t -> tin -> 'a) -> tin -> 'a

val check_and_add_metavar_binding :
Metavariable.mvar * Metavariable.mvalue -> tin -> tin option
Expand Down
2 changes: 1 addition & 1 deletion semgrep-core/src/matching/Unit_matcher.ml
Expand Up @@ -119,7 +119,7 @@ let tests ~any_gen_of_string =
let pattern = any_gen_of_string spattern in
let code = any_gen_of_string scode in
let cache = None in
let lang = None in
let lang = Lang.Python in
let config = Config_semgrep.default_config in
let env =
Matching_generic.empty_environment cache lang config
Expand Down

0 comments on commit c93a25b

Please sign in to comment.