Skip to content

Commit

Permalink
tainting: Add new rule options
Browse files Browse the repository at this point in the history
1. `taint_assume_safe_functions` to assume that function calls do not
   propagate taint from their arguments to their result.
2. `taint_assume_safe_indexes` to assume that a tainted index does not
   cause an access expression to be tainted.

Both should help reducing FPs. Also, `taint_assume_safe_functions` is
meant to replace the `not_conflicting` option in `pattenr-sanitizers`,
but is not enough by itself. This e.g. `sink(not_tainted(tainted('a')))`
will still be flagged due to how sinks work right now (anything within
the range of the sink is considered a sink, also `tainted('a')`). We
may also need to generalize `pattern-propagators` so you can enumerate
the functions that propagate taint.

Closes PA-1541

test plan:
make test # added two tests
  • Loading branch information
IagoAbal committed Oct 17, 2022
1 parent ca6ecd2 commit 844e0f2
Show file tree
Hide file tree
Showing 17 changed files with 126 additions and 26 deletions.
14 changes: 14 additions & 0 deletions changelog.d/pa-1541.added
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
taint-mode: Added two new rule `options` that help minimizing false positives.

One is `taint_assume_safe_functions`, which makes Semgrep assume that function
calls do *NOT* propagate taint from their arguments to their output. Otherwise,
Semgrep always assumes that functions may propagate taint. This is intended to
replace _not conflicting_ sanitizers (added in v0.69.0) in the future, but this
will not happen in this release.

Another is `taint_assume_safe_indexes`, which makes Semgrep assume that an
array-access expression is safe even if the index expression is tainted. Otherwise
Semgrep assumes that e.g. `a[i]` is tainted if `i` is tainted, even if `a` is not.

Note that both options are disabled by default, that is, Semgrep makes worst-case
assumptions unless told otherwise. However, this may change in the future.
4 changes: 4 additions & 0 deletions interfaces/Config_semgrep.atd
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ type t = {
~constant_propagation <ocaml default="true"> : bool;
(* symbolic_propagation requires constant_propagation to have effect *)
~symbolic_propagation <ocaml default="false"> : bool;

(* metavariables common to a source and sink will be unified *)
~taint_unify_mvars <ocaml default="false"> : bool;
~taint_assume_safe_functions <ocaml default="false"> : bool;
~taint_assume_safe_indexes <ocaml default="false"> : bool;

(* 'ac' stands for associative-commutative matching *)
~ac_matching <ocaml default="true"> : bool;
(* pretend && and || are commutative *)
Expand Down
2 changes: 1 addition & 1 deletion semgrep-core/src/engine/Match_env.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type id_to_match_results = (pattern_id, Pattern_match.t) Hashtbl.t
* they are not exposed to the user anymore.
*)
type xconfig = {
config : Config_semgrep.t;
config : Config_semgrep.t; (* corresponds to rule `options` key *)
equivs : Equivalence.equivalences;
(* Fields coming from Runner_config.t used by the engine.
* We could just include the whole Runner_config.t, but it's
Expand Down
3 changes: 3 additions & 0 deletions semgrep-core/src/engine/Match_rules.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ let check ~match_hook ~timeout ~timeout_threshold (xconf : Match_env.xconfig)
let res_rules, skipped_rules =
rules
|> Common.partition_either (fun r ->
let xconf =
Match_env.adjust_xconfig_with_rule_options xconf r.R.options
in
let relevant_rule =
if xconf.filter_irrelevant_rules then (
match Analyze_rule.regexp_prefilter_of_rule r with
Expand Down
1 change: 0 additions & 1 deletion semgrep-core/src/engine/Match_search_mode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,6 @@ and matches_of_formula xconf rule xtarget formula opt_context :
(*****************************************************************************)

let check_rule ({ R.mode = `Search formula; _ } as r) hook xconf xtarget =
let xconf = Match_env.adjust_xconfig_with_rule_options xconf r.R.options in
let rule_id = fst r.id in
let res, final_ranges = matches_of_formula xconf r xtarget formula None in
let errors = res.errors |> Report.ErrorSet.map (error_with_rule_id rule_id) in
Expand Down
8 changes: 4 additions & 4 deletions semgrep-core/src/engine/Match_tainting_mode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ let pm_of_finding finding =
| T.ArgToReturn _ ->
None

let check_fundef lang taint_config opt_ent fdef =
let check_fundef lang options taint_config opt_ent fdef =
let name =
let* ent = opt_ent in
let* name = AST_to_IL.name_of_entity ent in
Expand Down Expand Up @@ -551,7 +551,7 @@ let check_fundef lang taint_config opt_ent fdef =
in
let _, xs = AST_to_IL.function_definition lang fdef in
let flow = CFG_build.cfg_of_stmts xs in
Dataflow_tainting.fixpoint ~in_env ?name taint_config flow |> ignore
Dataflow_tainting.fixpoint ~in_env ?name options taint_config flow |> ignore

let check_rule (rule : R.taint_rule) match_hook (xconf : Match_env.xconfig)
(xtarget : Xtarget.t) =
Expand Down Expand Up @@ -579,7 +579,7 @@ let check_rule (rule : R.taint_rule) match_hook (xconf : Match_env.xconfig)
in

(* Check each function definition. *)
Visit_function_defs.visit (check_fundef lang taint_config) ast;
Visit_function_defs.visit (check_fundef lang xconf.config taint_config) ast;

(* Check the top-level statements.
* In scripting languages it is not unusual to write code outside
Expand All @@ -589,7 +589,7 @@ let check_rule (rule : R.taint_rule) match_hook (xconf : Match_env.xconfig)
Common.with_time (fun () ->
let xs = AST_to_IL.stmt lang (G.stmt1 ast) in
let flow = CFG_build.cfg_of_stmts xs in
Dataflow_tainting.fixpoint taint_config flow |> ignore)
Dataflow_tainting.fixpoint xconf.config taint_config flow |> ignore)
in
let matches =
!matches
Expand Down
4 changes: 3 additions & 1 deletion semgrep-core/src/engine/Test_dataflow_tainting.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ let test_tainting lang file config def =

Common.pr2 "\nDataflow";
Common.pr2 "--------";
let mapping = Dataflow_tainting.fixpoint config flow in
let mapping =
Dataflow_tainting.fixpoint Config_semgrep.default_config config flow
in
let taint_to_str taint =
let show_taint t =
match t.Taint.orig with
Expand Down
37 changes: 27 additions & 10 deletions semgrep-core/src/tainting/Dataflow_tainting.ml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,13 @@ type mapping = Lval_env.t D.mapping

(* HACK: Tracks tainted functions intrafile. *)
type fun_env = (var, Taints.t) Hashtbl.t
type env = { config : config; fun_name : var option; lval_env : Lval_env.t }

type env = {
options : Config_semgrep.t;
config : config;
fun_name : var option;
lval_env : Lval_env.t;
}

(*****************************************************************************)
(* Hooks *)
Expand Down Expand Up @@ -468,7 +474,10 @@ let rec check_tainted_expr env exp : Taints.t * Lval_env.t =
| Mem e -> check env e
in
let check_offset env = function
| Index e -> check env e
| Index e ->
let taints, lval_env = check env e in
if env.options.taint_assume_safe_indexes then (Taints.empty, lval_env)
else (taints, lval_env)
| Dot fld -> check_tainted_tok env (snd fld.ident)
in
let check_subexpr exp =
Expand Down Expand Up @@ -607,9 +616,11 @@ let check_tainted_instr env instr : Taints.t * Lval_env.t =
match check_function_signature env e args_taints with
| Some call_taints -> call_taints
| None ->
(* Default is to assume that the function will propagate
* the taint of its arguments. *)
List.fold_left Taints.union e_taints all_taints
if env.options.taint_assume_safe_functions then e_taints
else
(* Default is to assume that the function will propagate
* the taint of its arguments. *)
List.fold_left Taints.union e_taints all_taints
in
(call_taints, lval_env)
| CallSpecial (_, _, args) ->
Expand Down Expand Up @@ -674,19 +685,20 @@ let input_env ~enter_env ~(flow : F.cfg) mapping ni =
| penv1 :: penvs -> List.fold_left Lval_env.union penv1 penvs)

let (transfer :
Config_semgrep.t ->
config ->
Lval_env.t ->
string option ->
flow:F.cfg ->
Lval_env.t D.transfn) =
fun config enter_env opt_name ~flow
fun options config enter_env opt_name ~flow
(* the transfer function to update the mapping at node index ni *)
mapping ni ->
(* DataflowX.display_mapping flow mapping show_tainted; *)
let in' : Lval_env.t = input_env ~enter_env ~flow mapping ni in
let node = flow.graph#nodes#assoc ni in
let out' : Lval_env.t =
let env = { config; fun_name = opt_name; lval_env = in' } in
let env = { options; config; fun_name = opt_name; lval_env = in' } in
match node.F.n with
| NInstr x -> (
let taints, lval_env' = check_tainted_instr env x in
Expand Down Expand Up @@ -753,8 +765,13 @@ let (transfer :
(*****************************************************************************)

let (fixpoint :
?in_env:Lval_env.t -> ?name:Var_env.var -> config -> F.cfg -> mapping) =
fun ?in_env ?name:opt_name config flow ->
?in_env:Lval_env.t ->
?name:Var_env.var ->
Config_semgrep.t ->
config ->
F.cfg ->
mapping) =
fun ?in_env ?name:opt_name options config flow ->
let init_mapping = DataflowX.new_node_array flow Lval_env.empty_inout in
let enter_env =
match in_env with
Expand All @@ -764,6 +781,6 @@ let (fixpoint :
(* THINK: Why I cannot just update mapping here ? if I do, the mapping gets overwritten later on! *)
(* DataflowX.display_mapping flow init_mapping show_tainted; *)
DataflowX.fixpoint ~eq_env:Lval_env.equal ~init:init_mapping
~trans:(transfer config enter_env opt_name ~flow)
~trans:(transfer options config enter_env opt_name ~flow)
(* tainting is a forward analysis! *)
~forward:true ~flow
7 changes: 6 additions & 1 deletion semgrep-core/src/tainting/Dataflow_tainting.mli
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,12 @@ val hook_function_taint_signature :
(** Deep Semgrep *)

val fixpoint :
?in_env:Taint_lval_env.t -> ?name:var -> config -> IL.cfg -> mapping
?in_env:Taint_lval_env.t ->
?name:var ->
Config_semgrep.t ->
config ->
IL.cfg ->
mapping
(** Main entry point, [fixpoint config cfg] returns a mapping (effectively a set)
* containing all the tainted variables in [cfg]. Besides, if it infers any taint
* 'findings', it will invoke [config.handle_findings] which can perform any
Expand Down
23 changes: 23 additions & 0 deletions semgrep-core/tests/rules/taint_assume_safe_funcs.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

function not_tainted($data) {
// do stuff
return '2';
}

//Problem here is that `tainted('a')` is in itself considered a sink because
//currently taint-mode considers anyting that falls within a sink-range to be
//a sink... and the sink range here is whatever it matches `sink(...)`!
//todook:tainted
sink(not_tainted(tainted('a')));

$ok = not_tainted(tainted('a'));
//OK:
sink($ok);

//ruleid:tainted
sink(tainted('b'));

$bad = tainted('b');
//ruleid:tainted
sink($bad);
14 changes: 14 additions & 0 deletions semgrep-core/tests/rules/taint_assume_safe_funcs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
rules:
- id: tainted
languages:
- php
message: Match
mode: taint
options:
taint_assume_safe_functions: true
pattern-sinks:
- pattern: sink(...)
pattern-sources:
- pattern: tainted(...)
severity: WARNING

5 changes: 5 additions & 0 deletions semgrep-core/tests/rules/taint_assume_safe_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
a = safe()
i = tainted()
x = a[i]
#ok:tainted
sink(x)
14 changes: 14 additions & 0 deletions semgrep-core/tests/rules/taint_assume_safe_indexes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
rules:
- id: tainted
languages:
- python
message: Match
mode: taint
options:
taint_assume_safe_indexes: true
pattern-sinks:
- pattern: sink(...)
pattern-sources:
- pattern: tainted(...)
severity: WARNING

Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ function not_tainted($data) {
return '2';
}

//OK:
//OK:tainted
sink(not_tainted(tainted('a')));

$ok = not_tainted(tainted('a'));
//OK:
//OK:tainted
sink($ok);

//ERROR:
//ruleid:tainted
sink(tainted('b'));

$bad = tainted('b');
//ERROR:
//ruleid:tainted
sink($bad);
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ function not_tainted($data) {
return '2';
}

//OK:
//OK:tainted
sink(not_tainted(tainted('a')));

$ok = not_tainted(tainted('a'));
//OK:
//OK:tainted
sink($ok);

// $F(...) sanitizes *everything*
//OK:
//OK:tainted
sink(tainted('b'));

$bad = tainted('b');
// $F(...) sanitizes *everyting*
//OK:
//OK:tainted
sink($bad);

0 comments on commit 844e0f2

Please sign in to comment.