Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
IagoAbal committed May 3, 2024
1 parent 3427e03 commit d777bdd
Show file tree
Hide file tree
Showing 16 changed files with 630 additions and 276 deletions.
2 changes: 1 addition & 1 deletion libs/spacegrep/src/lib/Match.ml
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ let rec match_ (conf : conf) ~(dots : dots option) (env : env)
(cont :
dots:dots option -> env -> Loc.t -> Pattern_AST.node list -> match_result)
: match_result =
if !debug then Print_match.print pat doc;
(* if !debug then Print_match.print pat doc; *)
match (pat, doc) with
| [], doc -> (
match doc_matches_dots ~dots last_loc doc with
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Match_tainting_mode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ let check_var_def lang options taint_config env id ii expr =
in
let out_env = end_mapping.(flow.exit).Dataflow_core.out_env in
let lval : IL.lval = { base = Var name; rev_offset = [] } in
Lval_env.dumb_find out_env lval
Lval_env.find_lval_xtaint out_env lval

let add_to_env lang options taint_config env id ii opt_expr =
let var = AST_to_IL.var_of_id_info id ii in
Expand Down
568 changes: 380 additions & 188 deletions src/tainting/Dataflow_tainting.ml

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/tainting/Taint.ml
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,6 @@ let taints_of_pms ~incoming pms =
go (i + 1) taints' pms_left
in
go 0 Taint_set.empty pms

let lval_index_any =
IL.{ e = Operator (AST_generic.(Mult, fake "*"), []); eorig = NoOrig }
1 change: 1 addition & 0 deletions src/tainting/Taint.mli
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,4 @@ val show_taints : taints -> string

val compare_matches : Pattern_match.t -> Pattern_match.t -> int
val compare_metavar_env : Metavariable.bindings -> Metavariable.bindings -> int
val lval_index_any : IL.exp
81 changes: 52 additions & 29 deletions src/tainting/Taint_lval_env.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* to ensure that lvals satisfy IL_helpers.lval_is_var_and_dots, but rather handle
* that internally. *)

open Common
module T = Taint
module Taints = T.Taint_set
module S = Taint_shape
Expand Down Expand Up @@ -154,7 +155,9 @@ let normalize_lval lval =
|> List_.map_filter (fun o ->
match o.IL.o with
| IL.Dot _ -> Some o
| IL.Index _ -> (* no index-sensitivity in OSS *) None)
| IL.Index _ ->
(* no index-sensitivity in OSS *)
Some { o with o = IL.Index T.lval_index_any })
| Some normalize_rev_offset -> normalize_rev_offset rev_offset
in
Some (base, List.rev rev_offset)
Expand Down Expand Up @@ -203,7 +206,7 @@ let check_tainted_lvals_limit tainted new_var =
None)
else Some tainted

let add lval new_taints
let add_shape lval new_taints new_shape
({ tainted; control; taints_to_propagate; pending_propagation_dests } as
lval_env) =
match normalize_lval lval with
Expand All @@ -212,32 +215,37 @@ let add lval new_taints
variable. We just return the same environment untouched. *)
lval_env
| Some (var, offset) -> (
if Taints.is_empty new_taints then lval_env
else
match check_tainted_lvals_limit tainted var with
| None -> lval_env
| Some tainted ->
let new_taints =
let var_tok = snd var.ident in
if Tok.is_fake var_tok then new_taints
else
new_taints
|> Taints.map (fun t -> { t with tokens = var_tok :: t.tokens })
in
{
tainted =
NameMap.update var
(fun opt_var_ref ->
let var_ref =
opt_var_ref
|> Option.value ~default:(S.Ref (`None, S.Bot))
in
Some (S.taint_ref new_taints offset var_ref))
tainted;
control;
taints_to_propagate;
pending_propagation_dests;
})
match (Taints.is_empty new_taints, new_shape) with
| true, S.Bot -> lval_env
| __else__ -> (
match check_tainted_lvals_limit tainted var with
| None -> lval_env
| Some tainted ->
let new_taints =
let var_tok = snd var.ident in
if Tok.is_fake var_tok then new_taints
else
new_taints
|> Taints.map (fun t ->
{ t with tokens = var_tok :: t.tokens })
in
{
tainted =
NameMap.update var
(fun opt_var_ref ->
let var_ref =
opt_var_ref
|> Option.value ~default:(S.Ref (`None, S.Bot))
in
Some
(S.unify_ref_shape new_taints new_shape offset var_ref))
tainted;
control;
taints_to_propagate;
pending_propagation_dests;
}))

let add lval new_taints lval_env = add_shape lval new_taints S.Bot lval_env

let propagate_to prop_var taints env =
(* THINK: Should we record empty propagations anyways so that we can always
Expand All @@ -262,10 +270,25 @@ let propagate_to prop_var taints env =

let find_var_opt { tainted; _ } var = NameMap.find_opt var tainted

let dumb_find { tainted; _ } lval =
let find_lval { tainted; _ } lval =
let* var, offsets = normalize_lval lval in
Logs.debug (fun m ->
m ~tags:_tags "??? find_lval %s -> %s %s"
(Display_IL.string_of_lval lval)
(IL.str_of_name var)
(Display_IL.string_of_offset_list offsets));
let* var_ref = NameMap.find_opt var tainted in
S.find_in_ref offsets var_ref

let find_lval_xtaint { tainted; _ } lval =
match normalize_lval lval with
| None -> `None
| Some (var, offsets) -> (
Logs.debug (fun m ->
m ~tags:_tags "??? find_lval_xtaint %s -> %s %s"
(Display_IL.string_of_lval lval)
(IL.str_of_name var)
(Display_IL.string_of_offset_list offsets));
match NameMap.find_opt var tainted with
| None -> `None
| Some var_ref -> S.find_xtaint_ref offsets var_ref)
Expand Down
5 changes: 4 additions & 1 deletion src/tainting/Taint_lval_env.mli
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ val hook_normalize_rev_offset : (IL.offset list -> IL.offset list) option ref

val empty : env
val empty_inout : env Dataflow_core.inout
val add_shape : IL.lval -> Taint.taints -> Taint_shape.shape -> env -> env

val add : add_fn
(** Add taints to an l-value.
Expand All @@ -61,8 +62,10 @@ val add : add_fn
(* THINK: Perhaps keep propagators outside of this environment? *)
val propagate_to : Dataflow_var_env.var -> Taint.taints -> env -> env
val find_var_opt : env -> IL.name -> Taint_shape.ref option
val find_lval : env -> IL.lval -> Taint_shape.ref option

val dumb_find : env -> IL.lval -> [ `Clean | `None | `Tainted of Taint.taints ]
val find_lval_xtaint :
env -> IL.lval -> [ `Clean | `None | `Tainted of Taint.taints ]
(** Look up an l-value on the environemnt and return whether it's tainted, clean,
or we hold no info about it. It does not check sub-lvalues, e.g. if we record
that 'x.a' is tainted but had no explicit info about 'x.a.b', checking for
Expand Down
158 changes: 127 additions & 31 deletions src/tainting/Taint_shape.ml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ and obj = ref Fields.t
(* Helpers *)
(*****************************************************************************)

(* Violates INVARIANT(ref), see 'unsafe_find_offset_in_obj *)
(* UNSAFE: Violates INVARIANT(ref), see 'unsafe_find_offset_in_obj *)
let ref_none_bot = Ref (`None, Bot)

(* Temporarily breaks 'unsafe_find_offset_in_obj' by initializing a field with a
Expand Down Expand Up @@ -142,6 +142,27 @@ and show_obj obj =
|> Seq.map (fun (o, o_ref) -> spf "%s: %s" (T.show_offset o) (show_ref o_ref))
|> List.of_seq |> String.concat "; "

(*****************************************************************************)
(* Object shapes *)
(*****************************************************************************)

let tuple_like_obj taints_and_shapes : obj =
let _index, obj =
taints_and_shapes
|> List.fold_left
(fun (i, obj) (taints, shape) ->
let xtaint =
if Taints.is_empty taints then `None else `Tainted taints
in
match (xtaint, shape) with
| `None, Bot -> (* See INVARIANT(ref) *) (i + 1, obj)
| __else__ ->
let ref = Ref (xtaint, shape) in
(i + 1, Fields.add (T.Oint i) ref obj))
(0, Fields.empty)
in
obj

(*****************************************************************************)
(* Union (merging shapes) *)
(*****************************************************************************)
Expand Down Expand Up @@ -170,33 +191,84 @@ and union_obj obj1 obj2 =
(*****************************************************************************)

(* THINK: Generalize to "fold" ? *)
let rec union_taints_in_ref_acc acc ref =
let (Ref (xtaint, shape)) = ref in
match xtaint with
| `Clean ->
(* Due to INVARIANT(ref) we can just stop here. *)
acc
| `None -> union_taints_in_shape_acc acc shape
| `Tainted taints -> union_taints_in_shape_acc (Taints.union taints acc) shape

let union_taints_in_ref =
let rec go_ref acc ref =
let (Ref (xtaint, shape)) = ref in
match xtaint with
| `Clean ->
(* Due to INVARIANT(ref) we can just stop here. *)
acc
| `None -> go_shape acc shape
| `Tainted taints -> go_shape (Taints.union taints acc) shape
and go_shape acc = function
| Bot -> acc
| Obj obj -> go_obj acc obj
and go_obj acc obj =
Fields.fold (fun _ o_ref acc -> go_ref acc o_ref) obj acc
in
go_ref Taints.empty
and union_taints_in_shape_acc acc = function
| Bot -> acc
| Obj obj -> union_taints_in_obj_acc acc obj

and union_taints_in_obj_acc acc obj =
Fields.fold (fun _ o_ref acc -> union_taints_in_ref_acc acc o_ref) obj acc

let union_taints_in_ref = union_taints_in_ref_acc Taints.empty
let union_taints_in_shape = union_taints_in_shape_acc Taints.empty

(*****************************************************************************)
(* Find xtaint for an offset *)
(* Find an offset *)
(*****************************************************************************)

let rec find_xtaint_ref offset ref =
let rec find_in_ref offset ref =
let (Ref (xtaint, shape)) = ref in
match (offset, shape) with
| [], Obj obj when Fields.cardinal obj =|= 1 && Fields.mem Oany obj ->
(* Backwards compatibility "hack"
* If it's an object where all fields are tainted, we also add
* the taint here. *)
let (Ref (index_xtaint, _)) = Fields.find Oany obj in
let xtaint = Xtaint.union xtaint index_xtaint in
Some (Ref (xtaint, shape))
| [], _ -> Some ref
| _ :: _, _ -> find_in_shape offset shape

and find_in_shape offset = function
(* offset <> [] *)
| Bot -> None
| Obj obj -> find_in_obj offset obj

and find_in_obj offset obj =
(* offset <> [] *)
match offset with
| [] -> xtaint
| _ :: _ -> find_xtaint_shape offset shape
| [] ->
Logs.debug (fun m ->
m ~tags:error "fix_xtaint_obj: Impossible happened: empty offset");
None
| o :: offset -> (
match T.offset_of_IL o with
| Oany (* arbitrary index [*] *) ->
(* consider all fields/indexes *)
Fields.fold
(fun _ ref acc ->
match (acc, find_in_ref offset ref) with
| None, None -> None
| Some ref, None
| None, Some ref ->
Some ref
| Some ref1, Some ref2 -> Some (union_ref ref1 ref2))
obj None
| o -> (
match Fields.find_opt o obj with
| None -> None
| Some o_ref -> find_in_ref offset o_ref))

(* TODO: Define in terms of 'find_in_ref', what about the `[*]` case ? *)
let rec find_xtaint_ref offset ref =
let (Ref (xtaint, shape)) = ref in
(* TODO: Would need to do this in 'find_in_ref' too *)
match (offset, shape) with
| [], Obj obj when Fields.cardinal obj =|= 1 && Fields.mem Oany obj ->
(* If it's an object where all fields are tainted, we also add
* the taint here. *)
let (Ref (index_xtaint, _)) = Fields.find Oany obj in
Xtaint.union xtaint index_xtaint
| [], _ -> xtaint
| _ :: _, _ -> find_xtaint_shape offset shape

and find_xtaint_shape offset = function
(* offset <> [] *)
Expand Down Expand Up @@ -230,8 +302,19 @@ and find_xtaint_obj offset obj =

let rec unsafe_update_ref f offset ref =
match (ref, offset) with
| Ref (xtaint, shape), [] -> Ref (f xtaint, shape)
| Ref (xtaint, shape), [] ->
let xtaint, shape = f xtaint shape in
Ref (xtaint, shape)
| Ref (xtaint, shape), _ :: _ ->
let xtaint =
(* If we are tainting an offset of this ref, the ref cannot be
considered clean anymore. *)
match xtaint with
| `Clean -> `None
| `None
| `Tainted _ ->
xtaint
in
let shape = unsafe_update_shape f offset shape in
Ref (xtaint, shape)

Expand Down Expand Up @@ -262,29 +345,38 @@ and unsafe_update_obj f offset obj =
(* Tainting an offset *)
(*****************************************************************************)

let taint_ref new_taints offset ref =
let add_new_taints = function
let unify_ref_shape new_taints new_shape offset ref =
let new_taints =
(* TODO: Probably Dataflow_tainting should be returning this. *)
if Taints.is_empty new_taints then `None else `Tainted new_taints
in
let add_new_taints xtaint shape =
let shape = union_shape new_shape shape in
match xtaint with
| `None
| `Clean ->
`Tainted new_taints
| `Tainted taints ->
(* Assumption: either new_taints has taint or shape has taints somewhere *)
(new_taints, shape)
| `Tainted taints as xtaint ->
if
!Flag_semgrep.max_taint_set_size =|= 0
|| Taints.cardinal taints < !Flag_semgrep.max_taint_set_size
then `Tainted (Taints.union new_taints taints)
then (Xtaint.union new_taints xtaint, shape)
else (
Logs.debug (fun m ->
m ~tags:warning
"Already tracking too many taint sources for %s, will not \
track more"
(Display_IL.string_of_offset_list offset));
`Tainted taints)
(`Tainted taints, shape))
in
if Taints.is_empty new_taints then
Logs.debug (fun m ->
m ~tags:error "taint_ref: Impossible happened: empty taint set");
(* if Taints.is_empty new_taints then
Logs.debug (fun m ->
m ~tags:error "taint_ref: Impossible happened: empty taint set"); *)
unsafe_update_ref add_new_taints offset ref

let taint_ref new_taints offset ref = unify_ref_shape new_taints Bot offset ref

(*****************************************************************************)
(* Clean taint *)
(*****************************************************************************)
Expand All @@ -300,6 +392,10 @@ let rec clean_ref offset ref =
* mark from other refs that may be pointing to this ref in order to
* maintain the invariant ? *)
Ref (`Clean, Bot)
| [ { IL.o = IL.Index { e = Operator ((G.Mult, _), []); _ }; _ } ] ->
(* If an object is tainted, and we clean all its indexes, then we instead
* clean the object itself. *)
Ref (`Clean, Bot)
| _ :: _ ->
let shape = clean_shape offset shape in
Ref (xtaint, shape)
Expand Down
Loading

0 comments on commit d777bdd

Please sign in to comment.