Skip to content

Commit

Permalink
taint-mode: Check variables as if they were subexpressions (#4323)
Browse files Browse the repository at this point in the history
If the LHS of `->foo` was a sink, and we encountered `x->foo` where `x`
was a variable, we did not report a finding even if `x` was tainted. We
were not checking whether `x` was in a sink position.

Although they are not represented as such in the IL, variables are
themselves subexpressions, so we must check whether they are sinks or
sanitized.

Closes #4320

test plan:
make test # test included
  • Loading branch information
IagoAbal committed Nov 30, 2021
1 parent 7479a77 commit 0405a6d
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 14 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Expand Up @@ -11,6 +11,9 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
- Java: class patterns not using generics will match classes using generics
(#4335), e.g., `class $X { ...}` will now match `class Foo<T> { }`
- TS: parse correctly type definitions (#4330)
- taint-mode: Findings are now reported when the LHS of an access operator is
a sink (e.g. as in `$SINK->method`), and the LHS operand is a tainted
variable (#4320)

### Changed
- semgrep-core: Log messages are now tagged with the process id
Expand Down
38 changes: 24 additions & 14 deletions semgrep-core/src/engine/Dataflow_tainting.ml
Expand Up @@ -132,26 +132,36 @@ let make_tainted_sink_matches sink_pms src_pms =
(* Tainted *)
(*****************************************************************************)

(* Test whether an expression is tainted, and if it is also a sink,
* report the finding too (by side effect).
*
*)
let check_tainted_var config (fun_env : fun_env) (env : PM.Set.t VarMap.t) var =
let source_pms, sanitize_pms, sink_pms =
let _, tok = var.ident in
if Parse_info.is_origintok tok then
( config.is_source (G.Tk tok),
config.is_sanitizer (G.Tk tok),
config.is_sink (G.Tk tok) )
else ([], [], [])
in
let tainted_pms =
PM.Set.of_list source_pms
|> PM.Set.union (set_opt_to_set (VarMap.find_opt (str_of_name var) env))
|> PM.Set.union
(set_opt_to_set (Hashtbl.find_opt fun_env (str_of_name var)))
in
match sanitize_pms with
| _ :: _ -> PM.Set.empty
| [] ->
let found = make_tainted_sink_matches sink_pms tainted_pms in
if not (PM.Set.is_empty found) then config.found_tainted_sink found env;
tainted_pms

(* Test whether an expression is tainted, and if it is also a sink,
* report the finding too (by side effect). *)
let rec check_tainted_expr config (fun_env : fun_env) (env : PM.Set.t VarMap.t)
exp =
let check = check_tainted_expr config fun_env env in
let sink_pms = config.is_sink (G.E exp.eorig) in
let check_base = function
| Var var ->
let var_tok_pms =
let _, tok = var.ident in
if Parse_info.is_origintok tok then config.is_source (G.Tk tok)
else []
in
PM.Set.of_list var_tok_pms
|> PM.Set.union (set_opt_to_set (VarMap.find_opt (str_of_name var) env))
|> PM.Set.union
(set_opt_to_set (Hashtbl.find_opt fun_env (str_of_name var)))
| Var var -> check_tainted_var config fun_env env var
| VarSpecial _ -> PM.Set.empty
| Mem e -> check e
in
Expand Down
11 changes: 11 additions & 0 deletions semgrep-core/tests/tainting_rules/php/lval_var_sink.php
@@ -0,0 +1,11 @@
<?php
// https://github.com/returntocorp/semgrep/issues/4320
$doc = new DOMDocument();
// We did not catch the error when the sink was the variable in an l-value,
// because we assumed that only expressions or instructions could be sinks.
//ERROR:
$doc->load('file.xml');
//ERROR:
$doc->load($doc);
//OK:
$something->load($doc);
15 changes: 15 additions & 0 deletions semgrep-core/tests/tainting_rules/php/lval_var_sink.yaml
@@ -0,0 +1,15 @@
rules:
- id: domdocument-load
severity: INFO
message: DOMDocument::load
languages:
- php
mode: taint
pattern-sources:
- patterns:
- pattern: new DOMDocument(...)
pattern-sinks:
- patterns:
- pattern-inside: $DOMDOCUMENT->load($FILENAME, ...)
- pattern: $DOMDOCUMENT

0 comments on commit 0405a6d

Please sign in to comment.