From 500e0aced6e9a9e2fd426966e85109ce26ab5f1d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 1 Mar 2022 17:14:35 +0100 Subject: [PATCH 01/44] Python: Rewrite sax XML tests The tests for type-trackers were not that interesting, since they did not have XML input in both cases, which is the problem we were trying hard to solve. I did keep the test-case of not-user-supplied url alive as well though :+1: I added OK/NOT OK annotations. Notice that we report all 4 kinds of vulnerabilities on line 93 --- .../CWE-611/XmlEntityInjection.expected | 82 +++++++++++-------- .../Security/CWE-611/xml_sax_make_parser.py | 37 +++++---- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 081a8c6e6af8..2e291875ce81 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -71,18 +71,19 @@ edges | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | @@ -186,22 +187,23 @@ nodes | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:57:19:57:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:57:19:57:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:69:19:69:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:69:19:69:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:69:19:69:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:73:34:73:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:79:19:79:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:79:19:79:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:79:19:79:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:86:22:86:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:91:19:91:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:91:19:91:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -231,13 +233,21 @@ subpaths | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:63:18:63:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:57:19:57:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:98:22:98:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:91:19:91:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py index 9f858d99ddd2..e95abf753796 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py @@ -13,7 +13,7 @@ def characters(self, data): self._result.append(data) def parse(self, f): - xml.sax.parse(f, self) + xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic return self._result # GOOD @@ -33,7 +33,7 @@ def xml_makeparser_MainHandler(): GoodHandler = MainHandler() parser = xml.sax.make_parser() parser.setContentHandler(GoodHandler) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic return GoodHandler._result @@ -46,11 +46,17 @@ def xml_makeparser_MainHandler_entitiesFalse(): parser.setContentHandler(GoodHandler) # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic return GoodHandler._result -# BAD +@app.route("not-user-controlled") +def not_user_controlled(): + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse("/not-user-controlled/default_config.xml") # OK + return +# BAD @app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") def xml_makeparser_MainHandler_entitiesTrue(): @@ -60,7 +66,7 @@ def xml_makeparser_MainHandler_entitiesTrue(): parser = xml.sax.make_parser() parser.setContentHandler(BadHandler) parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) + parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic return BadHandler._result @@ -70,7 +76,8 @@ def xml_makeparser_minidom_entitiesTrue(): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes + doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic + return doc.documentElement.childNodes # Forward Type Tracking test @@ -80,20 +87,18 @@ def forward_tracking1(action): parser = xml.sax.make_parser() if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse("/not-user-controlled/default_config.xml") + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic else: - parser.parse(StringIO(xml_content)) - return + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic + return @app.route("forward_tracking2") def forward_tracking2(action): xml_content = request.args['xml_content'] parser = xml.sax.make_parser() - if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse("/not-user-controlled/default_config.xml") - else: - parser.parse(StringIO(xml_content)) - return + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic + return From ee23c05489deb55626fe0402760ff89535856c84 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 1 Mar 2022 17:15:19 +0100 Subject: [PATCH 02/44] Python: XML: Expose vuln kind on sink --- .../Security/CWE-611/XmlEntityInjection.ql | 8 ++- .../security/dataflow/XmlEntityInjection.qll | 18 ------- .../XmlEntityInjectionCustomizations.qll | 53 +++++++------------ 3 files changed, 25 insertions(+), 54 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql index 8f22ded4b157..03f0c7b1c0e9 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -15,8 +15,12 @@ import python import experimental.semmle.python.security.dataflow.XmlEntityInjection import DataFlow::PathGraph -from DataFlow::PathNode source, DataFlow::PathNode sink, string kind -where XmlEntityInjection::xmlEntityInjectionVulnerable(source, sink, kind) +from + XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, + DataFlow::PathNode sink, string kind +where + config.hasFlowPath(source, sink) and + kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() select sink.getNode(), source, sink, "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll index 087c3057640e..35220e153d12 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll @@ -25,22 +25,4 @@ module XmlEntityInjection { any(AdditionalTaintStep s).step(nodeFrom, nodeTo) } } - - private import DataFlow::PathGraph - - /** Holds if there is an XML injection from `source` to `sink` */ - predicate xmlEntityInjection(DataFlow::PathNode source, DataFlow::PathNode sink) { - any(XmlEntityInjectionConfiguration x).hasFlowPath(source, sink) - } - - /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */ - predicate xmlEntityInjectionVulnerable( - DataFlow::PathNode source, DataFlow::PathNode sink, string kind - ) { - xmlEntityInjection(source, sink) and - ( - xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or - xmlParserInputAsVulnerableSink(sink.getNode(), kind) - ) - } } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 8f8b3ae2c6ab..7de0c0c4b9c2 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -24,7 +24,10 @@ module XmlEntityInjection { /** * A data flow sink for "xml injection" vulnerabilities. */ - abstract class Sink extends DataFlow::Node { } + abstract class Sink extends DataFlow::Node { + /** Gets the kind of XML injection that this sink is vulnerable to. */ + abstract string getVulnerableKind(); + } /** * A sanitizer guard for "xml injection" vulnerabilities. @@ -46,53 +49,35 @@ module XmlEntityInjection { } /** - * A data flow sink for XML parsing libraries. + * An input to a direct XML parsing function, considered as a flow sink. * * See `XML::XMLParsing`. */ - abstract class XMLParsingSink extends Sink { } + class XMLParsingInputAsSink extends Sink { + XML::XMLParsing xmlParsing; - /** - * A data flow sink for XML parsers. - * - * See `XML::XMLParser` - */ - abstract class XMLParserSink extends Sink { } + XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - /** - * A source of remote user input, considered as a flow source. - */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } - - /** - * An xml parsing operation, considered as a flow sink. - */ - class XMLParsingInputAsSink extends XMLParsingSink { - XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() } + override string getVulnerableKind() { xmlParsing.vulnerable(result) } } /** - * An xml parsing operation vulnerable to `kind`. + * An input to an XML parser, considered as a flow sink. + * + * See `XML::XMLParser` */ - predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) { - exists(XML::XMLParsing xmlParsing | - sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind) - ) - } + class XMLParserInputAsSink extends Sink { + XML::XMLParser xmlParser; - /** - * An xml parser operation, considered as a flow sink. - */ - class XMLParserInputAsSink extends XMLParserSink { - XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() } + XMLParserInputAsSink() { this = xmlParser.getAnInput() } + + override string getVulnerableKind() { xmlParser.vulnerable(result) } } /** - * An xml parser operation vulnerable to `kind`. + * A source of remote user input, considered as a flow source. */ - predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) { - exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind)) - } + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } /** * A comparison with a constant string, considered as a sanitizer-guard. From aaf55b21c46dbd3e6a84204d0a43f39ec32d85fe Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 10:58:58 +0100 Subject: [PATCH 03/44] Python: Add XMLVulnerabilityKind This gives some freedom in changing the name presented, and not worrying about whether you have made a typo that makes everything break :| --- .../experimental/semmle/python/Concepts.qll | 31 ++++++++++++++-- .../semmle/python/frameworks/Xml.qll | 37 ++++++++++--------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 09f8e7897c58..4cdd803c9327 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -15,6 +15,29 @@ private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks module XML { + /** + * A kind of XML vulnerability. + * + * See https://pypi.org/project/defusedxml/#python-xml-libraries + */ + class XMLVulnerabilityKind extends string { + XMLVulnerabilityKind() { + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",] + } + + /** Holds for Billion Laughs vulnerability kind. */ + predicate isBillionLaughs() { this = "Billion Laughs" } + + /** Holds for Quadratic Blowup vulnerability kind. */ + predicate isQuadraticBlowup() { this = "Quadratic Blowup" } + + /** Holds for XXE vulnerability kind. */ + predicate isXxe() { this = "XXE" } + + /** Holds for DTD retrieval vulnerability kind. */ + predicate isDtdRetrieval() { this = "DTD retrieval" } + } + /** * A data-flow node that collects functions parsing XML. * @@ -30,7 +53,7 @@ module XML { /** * Holds if the parsing method or the parser holding it is vulnerable to `kind`. */ - predicate vulnerable(string kind) { super.vulnerable(kind) } + predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -50,7 +73,7 @@ module XML { /** * Holds if the parsing method or the parser holding it is vulnerable to `kind`. */ - abstract predicate vulnerable(string kind); + abstract predicate vulnerable(XMLVulnerabilityKind kind); } } @@ -69,7 +92,7 @@ module XML { /** * Holds if the parser is vulnerable to `kind`. */ - predicate vulnerable(string kind) { super.vulnerable(kind) } + predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } /** Provides classes for modeling XML parsers. */ @@ -89,7 +112,7 @@ module XML { /** * Holds if the parser is vulnerable to `kind`. */ - abstract predicate vulnerable(string kind); + abstract predicate vulnerable(XMLVulnerabilityKind kind); } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index cf4abbac995b..ffd8d44ba35c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -24,7 +24,7 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(string kind) { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() } } /** @@ -57,7 +57,7 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) @@ -111,27 +111,27 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature | handler = API::moduleImport("xml").getMember("sax").getMember("handler") and parse.calls(trackSaxFeature(this, feature), "parse") and parse.getArg(0) = this.getAnInput() // enough to avoid FPs? | - kind = ["XXE", "DTD retrieval"] and + (kind.isXxe() or kind.isDtdRetrieval()) and feature = handler.getMember("feature_external_ges") or - kind = ["Billion Laughs", "Quadratic Blowup"] + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) ) } - predicate vulnerable(DataFlow::Node n, string kind) { + predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) { exists(API::Node handler, API::Node feature | handler = API::moduleImport("xml").getMember("sax").getMember("handler") and DataFlow::exprNode(trackSaxFeature(this, feature).asExpr()) .(DataFlow::LocalSourceNode) .flowsTo(n) | - kind = ["XXE", "DTD retrieval"] and + (kind.isXxe() or kind.isDtdRetrieval()) and feature = handler.getMember("feature_external_ges") ) } @@ -162,14 +162,14 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(string kind) { - kind = "XXE" and + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isXxe() and not ( exists(this.getArgByName("resolve_entities")) or this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) ) or - kind = ["Billion Laughs", "Quadratic Blowup"] and + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and ( this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) @@ -206,12 +206,12 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) or - kind = "XXE" and not exists(this.getArgByName("parser")) + kind.isXxe() and not exists(this.getArgByName("parser")) } } @@ -233,8 +233,8 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { - kind = ["Billion Laughs", "Quadratic Blowup"] and + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } } @@ -266,12 +266,13 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } - override predicate vulnerable(string kind) { + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) or - kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser")) + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + not exists(this.getArgByName("parser")) } } @@ -300,6 +301,8 @@ private module Xml { result = this.getAMethodCall("register_function").getArg(0) } - override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } } From 16e482bf6fcb07bc11a77ca1e82c65baf41c1ac8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 11:53:02 +0100 Subject: [PATCH 04/44] Python: Improve QLDoc for XML parsing/parsers --- python/ql/src/experimental/semmle/python/Concepts.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 4cdd803c9327..22616c0a5d2b 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -39,7 +39,7 @@ module XML { } /** - * A data-flow node that collects functions parsing XML. + * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. @@ -59,7 +59,7 @@ module XML { /** Provides classes for modeling XML parsing APIs. */ module XMLParsing { /** - * A data-flow node that collects functions parsing XML. + * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. @@ -78,7 +78,7 @@ module XML { } /** - * A data-flow node that collects XML parsers. + * A data-flow node that constructs an XML parser. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. @@ -98,7 +98,7 @@ module XML { /** Provides classes for modeling XML parsers. */ module XMLParser { /** - * A data-flow node that collects XML parsers. + * A data-flow node that constructs an XML parser. * * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParser` instead. From 6dd776b2de9e6eede27d2cc22d9781db4fe8a83d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 14:52:11 +0100 Subject: [PATCH 05/44] Python: Only produce one alert per vulnerable XML sink This made it much easier to debug the current alerts on tests at least. Notice that it's important that we have `strictconcat` and not just `concat`, since `concat` will also allow flow to sinks that are not vulnerable to any kind of XML vulnerability :| --- .../Security/CWE-611/XmlEntityInjection.ql | 11 +++- .../CWE-611/XmlEntityInjection.expected | 55 ++++++------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql index 03f0c7b1c0e9..922ca346b173 100644 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -17,10 +17,15 @@ import DataFlow::PathGraph from XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, - DataFlow::PathNode sink, string kind + DataFlow::PathNode sink, string kinds where config.hasFlowPath(source, sink) and - kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() + kinds = + strictconcat(string kind | + kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() + | + kind, ", " + ) select sink.getNode(), source, sink, - "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(), + "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(), "This", source.getNode(), "user-provided value" diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 2e291875ce81..6c342ef223ea 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -214,40 +214,21 @@ nodes | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to DTD retrieval. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Billion Laughs. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | From 7f7758b83dc1ae6a3e528cf6b3b7349e60fd3e56 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 2 Mar 2022 13:57:28 +0100 Subject: [PATCH 06/44] Python: rewrite xml sax modeling --- .../semmle/python/frameworks/Xml.qll | 134 ++++++++++++------ .../CWE-611/XmlEntityInjection.expected | 6 +- 2 files changed, 93 insertions(+), 47 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index ffd8d44ba35c..1a01bf4c5c8f 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -64,32 +64,90 @@ private module Xml { } } - /** Gets a reference to a `parser` that has been set a `feature`. */ - private DataFlow::Node trackSaxFeature( - DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature + /** + * A call to the `setFeature` method on a XML sax parser. + * + * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature + */ + class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + SaxParserSetFeatureCall() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("setFeature") + .getACall() + } + + // The keyword argument names does not match documentation. I checked (with Python + // 3.9.5) that the names used here actually works. + DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } + + DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( + DataFlow::TypeBackTracker t, DataFlow::Node arg ) { t.start() and - exists(DataFlow::MethodCallNode featureCall | - featureCall = parser.getAMethodCall("setFeature") and - featureCall.getArg(0).getALocalSource() = feature.getAUse() and - featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and - result = featureCall.getObject() + arg = any(SaxParserSetFeatureCall c).getStateArg() and + result = arg.getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) + ) + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { + result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) + } + + /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */ + private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { + t.start() and + exists(SaxParserSetFeatureCall call | + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = true and + result = call.getObject() ) or exists(DataFlow::TypeTracker t2 | - t = t2.smallstep(trackSaxFeature(t2, parser, feature), result) + t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) + ) and + // take account of that we can set the feature to False, which makes the parser safe again + not exists(SaxParserSetFeatureCall call | + call.getObject() = result and + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = false ) } - /** Gets a reference to a `parser` that has been set a `feature`. */ - DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) { - result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature) + /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */ + DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { + result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) } /** - * Gets a call to `xml.sax.make_parser`. - * - * Given the following example: + * A XML parsing call with a sax parser. * * ```py * BadHandler = MainHandler() @@ -99,41 +157,27 @@ private module Xml { * parser.parse(StringIO(xml_content)) * parsed_xml = BadHandler._result * ``` - * - * * `this` would be `xml.sax.make_parser()`. - * * `getAnInput()`'s result would be `StringIO(xml_content)`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. */ - private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLSaxParser() { - this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall() + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("parse") + .getACall() } - override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) } + override DataFlow::Node getAnInput() { result = this.getArg(0) } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature | - handler = API::moduleImport("xml").getMember("sax").getMember("handler") and - parse.calls(trackSaxFeature(this, feature), "parse") and - parse.getArg(0) = this.getAnInput() // enough to avoid FPs? - | - (kind.isXxe() or kind.isDtdRetrieval()) and - feature = handler.getMember("feature_external_ges") - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - ) - } - - predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) { - exists(API::Node handler, API::Node feature | - handler = API::moduleImport("xml").getMember("sax").getMember("handler") and - DataFlow::exprNode(trackSaxFeature(this, feature).asExpr()) - .(DataFlow::LocalSourceNode) - .flowsTo(n) - | - (kind.isXxe() or kind.isDtdRetrieval()) and - feature = handler.getMember("feature_external_ges") - ) + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 6c342ef223ea..0109566be06d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -227,8 +227,10 @@ subpaths | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | From 515b824b3cd857dc29a3484817a1a0d170bae2f6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 09:42:19 +0100 Subject: [PATCH 07/44] Python: Add lxml positive test --- .../CWE-611/XmlEntityInjection.expected | 23 ++++++++++++------- .../Security/CWE-611/lxml_etree.py | 8 +++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 0109566be06d..634e7dd28d7c 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -20,12 +20,15 @@ edges | lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | -| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | -| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | -| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | +| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | +| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | +| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | +| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | +| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | +| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | @@ -119,14 +122,18 @@ nodes | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:65:19:65:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:65:19:65:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:65:19:65:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:68:34:68:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -220,7 +227,7 @@ subpaths | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | This | lxml_etree.py:73:19:73:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py index 2c3c6f5f2ffc..231116c2b720 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py @@ -56,6 +56,14 @@ def lxml_parser(): parser = lxml.etree.XMLParser(resolve_entities=False) return lxml.etree.fromstring(xml_content, parser=parser).text +# XXE-vuln +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=True) + return lxml.etree.fromstring(xml_content, parser=parser).text + # Billion laughs and quadratic blowup (huge_tree) ## Good (huge_tree=True but resolve_entities=False) From 661d8bf553778aa49054347e215015877c3876a5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 09:45:42 +0100 Subject: [PATCH 08/44] Python: Better handling of `resolve_entities` arg in lxml --- .../ql/src/experimental/semmle/python/frameworks/Xml.qll | 9 ++++++--- .../Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 1a01bf4c5c8f..6f865e13cdb1 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -206,11 +206,14 @@ private module Xml { override DataFlow::Node getAnInput() { none() } + // NOTE: it's not possible to change settings of a parser after constructing it override predicate vulnerable(XML::XMLVulnerabilityKind kind) { kind.isXxe() and - not ( - exists(this.getArgByName("resolve_entities")) or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 634e7dd28d7c..86edcb89d4da 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -227,6 +227,7 @@ subpaths | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | From 52891cb4763bc3714a2e3cc95ea240145d55d910 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 12:36:24 +0100 Subject: [PATCH 09/44] Python: Add PoC for XML vulns --- .../Security/CWE-611/dont_extract/PoC.py | 449 ++++++++++++++++++ .../Security/CWE-611/dont_extract/flag | 1 + 2 files changed, 450 insertions(+) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py new file mode 100644 index 000000000000..85301c32bff6 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 + +# this file doesn't have a .py extension so the extractor doesn't pick it up, so it +# doesn't have to be annotated + +# This file shows the ways to make exploit vulnerable XML parsing +# see +# https://pypi.org/project/defusedxml/#python-xml-libraries +# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities + +import pathlib +from flask import Flask +import threading +import multiprocessing +import time +from io import StringIO +import pytest + +HOST = "localhost" +PORT = 8080 + + +FLAG_PATH = pathlib.Path(__file__).with_name("flag") + +# ============================================================================== +# xml samples + +ok_xml = f""" +hello world +""" + +local_xxe = f""" + +]> +&xxe; +""" + +remote_xxe = f""" + +]> +&remote_xxe; +""" + +billion_laughs = """ + + + + + + + + + + + +]> +&lol9;""" + +quadratic_blowup = f""" + +]> +{"&oops;"*20000}""" + +dtd_retrieval = f""" + +bar +""" + +# ============================================================================== +# other setup + +# we set up local Flask application so we can tests whether loading external resources +# works (such as SSRF from DTD-retrival works) +app = Flask(__name__) + +@app.route("/alive") +def alive(): + return "ok" + +hit_dtd = False +@app.route("/test.dtd") +def test_dtd(): + global hit_dtd + hit_dtd = True + return """""" + +hit_xxe = False +@app.route("/xxe") +def test_xxe(): + global hit_xxe + hit_xxe = True + return "ok" + +def run_app(): + app.run(host=HOST, port=PORT) + +@pytest.fixture(scope="session", autouse=True) +def flask_app_running(): + # run flask in other thread + flask_thread = threading.Thread(target=run_app, daemon=True) + flask_thread.start() + + # give flask a bit of time to start + time.sleep(0.1) + + # ensure that the server works + import requests + requests.get(f"http://{HOST}:{PORT}/alive") + + yield + +def expects_timeout(func): + def inner(): + proc = multiprocessing.Process(target=func) + proc.start() + time.sleep(0.1) + assert proc.exitcode == None + proc.kill() + proc.join() + return inner + + +class TestExpectsTimeout: + "test that expects_timeout works as expected" + + @staticmethod + @expects_timeout + def test_slow(): + time.sleep(1000) + + @staticmethod + def test_fast(): + @expects_timeout + def fast_func(): + return "done!" + + with pytest.raises(AssertionError): + fast_func() + +# ============================================================================== +import xml.sax + +class SimpleHandler(xml.sax.ContentHandler): + def __init__(self): + self.result = [] + + def characters(self, data): + self.result.append(data) + +class TestSax(): + # always vuln to billion laughs, quadratic + + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(billion_laughs)) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(quadratic_blowup)) + + @staticmethod + def test_ok_xml(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(ok_xml)) + assert handler.result == ["hello world"], handler.result + + @staticmethod + def test_xxe_disabled_by_default(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(local_xxe)) + assert handler.result == [], handler.result + + @staticmethod + def test_local_xxe_manually_enabled(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(local_xxe)) + assert handler.result[0] == "SECRET_FLAG", handler.result + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(remote_xxe)) + assert handler.result == ["ok"], handler.result + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == True + + +# ============================================================================== +import xml.etree.ElementTree + +class TestEtree: + + # always vuln to billion laughs, quadratic + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_ok_xml(): + parser = xml.etree.ElementTree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_xxe_not_possible(): + parser = xml.etree.ElementTree.XMLParser() + try: + _root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser) + assert False + except xml.etree.ElementTree.ParseError as e: + assert "undefined entity &xxe" in str(e) + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + +# ============================================================================== +import lxml.etree + +class TestLxml: + # see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + @staticmethod + def test_billion_laughs_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quadratic_blowup_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + try: + _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + def test_ok_xml(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_local_xxe_enabled_by_default(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "SECRET_FLAG\n", root.text + + @staticmethod + def test_local_xxe_disabled(): + parser = lxml.etree.XMLParser(resolve_entities=False) + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == None + + @staticmethod + def test_remote_xxe_disabled_by_default(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser() + try: + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Failure to process entity remote_xxe" in str(e) + assert hit_xxe == False + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "ok" + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + # Need to set BOTH load_dtd and no_network + parser = lxml.etree.XMLParser(load_dtd=True) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == True + + hit_dtd = False + + # Setting dtd_validation also does not allow the remote access + parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True) + try: + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + except lxml.etree.XMLSyntaxError: + pass + assert hit_dtd == False + + +# ============================================================================== + +import xmltodict + +class TestXmltodict: + @staticmethod + def test_billion_laughs_disabled_by_default(): + d = xmltodict.parse(billion_laughs) + assert d == {"lolz": None}, d + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + d = xmltodict.parse(quadratic_blowup) + assert d == {"foo": None}, d + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + xmltodict.parse(billion_laughs, disable_entities=False) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_manually_enabled(): + xmltodict.parse(quadratic_blowup, disable_entities=False) + + @staticmethod + def test_ok_xml(): + d = xmltodict.parse(ok_xml) + assert d == {"test": "hello world"}, d + + @staticmethod + def test_local_xxe_not_possible(): + d = xmltodict.parse(local_xxe) + assert d == {"test": None} + + d = xmltodict.parse(local_xxe, disable_entities=False) + assert d == {"test": None} + + @staticmethod + def test_remote_xxe_not_possible(): + global hit_xxe + hit_xxe = False + + d = xmltodict.parse(remote_xxe) + assert d == {"test": None} + assert hit_xxe == False + + d = xmltodict.parse(remote_xxe, disable_entities=False) + assert d == {"test": None} + assert hit_xxe == False + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + d = xmltodict.parse(dtd_retrieval) + assert hit_dtd == False diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag new file mode 100644 index 000000000000..45c9436ee9f2 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag @@ -0,0 +1 @@ +SECRET_FLAG From 3c321dd98dcd62193844f61c03eaa81ca5d4ee43 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 13:49:17 +0100 Subject: [PATCH 10/44] Python: Model `lxml.etree.get_default_parser` in own class --- .../semmle/python/frameworks/Xml.qll | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 6f865e13cdb1..4ecd2d8a99e9 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -182,26 +182,35 @@ private module Xml { } /** - * Gets a call to: - * * `lxml.etree.XMLParser` - * * `lxml.etree.get_default_parser` - * - * Given the following example: + * A call to `lxml.etree.get_default_parser`. * - * ```py - * lxml.etree.XMLParser() - * ``` + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { + LXMLDefaultParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } + + override DataFlow::Node getAnInput() { none() } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + } + } + + /** + * A call to `lxml.etree.XMLParser`. * - * * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`. - * * `vulnerable(kind)`'s `kind` would be `XXE` + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { LXMLParser() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["XMLParser", "get_default_parser"]) - .getACall() + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() } override DataFlow::Node getAnInput() { none() } From 124c03c15c9df8b14d143d3e38a410d3f60cb8e3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:38:41 +0100 Subject: [PATCH 11/44] Python: Expand lxml tests And add annotations, see PoC.py for reference Some of these needs fixing though --- .../CWE-611/XmlEntityInjection.expected | 37 +++++++++++-------- .../Security/CWE-611/lxml_etree.py | 32 +++++++++------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 86edcb89d4da..b29a6d049db1 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -23,12 +23,15 @@ edges | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | -| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | -| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | -| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | -| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | -| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | -| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | +| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | +| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | +| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | +| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | +| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | +| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | +| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | +| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | +| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | @@ -126,14 +129,18 @@ nodes | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:73:19:73:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:73:19:73:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:73:19:73:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:76:34:76:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:81:19:81:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:81:19:81:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:81:19:81:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -228,7 +235,7 @@ subpaths | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:84:34:84:44 | ControlFlowNode for xml_content | This | lxml_etree.py:81:19:81:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py index 231116c2b720..2c2712098503 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py @@ -10,25 +10,25 @@ def lxml_etree_fromstring(): xml_content = request.args['xml_content'] - return lxml.etree.fromstring(xml_content).text + return lxml.etree.fromstring(xml_content).text # NOT OK for XXE @app.route("/lxml_etree_fromstringlist") def lxml_etree_fromstringlist(): xml_content = request.args['xml_content'] - return lxml.etree.fromstringlist([xml_content]).text + return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE @app.route("/lxml_etree_XML") def lxml_etree_XML(): xml_content = request.args['xml_content'] - return lxml.etree.XML(xml_content).text + return lxml.etree.XML(xml_content).text # NOT OK for XXE @app.route("/lxml_etree_parse") def lxml_etree_parse(): xml_content = request.args['xml_content'] - return lxml.etree.parse(StringIO(xml_content)).getroot().text + return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE # With parsers - Default @@ -37,14 +37,14 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE @app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.get_default_parser() - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE # With parsers - With options @@ -54,7 +54,7 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=False) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE # XXE-vuln @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") @@ -62,23 +62,29 @@ def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE # Billion laughs and quadratic blowup (huge_tree) -## Good (huge_tree=True but resolve_entities=False) - @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic -## Bad @app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") def lxml_parser(): xml_content = request.args['xml_content'] parser = lxml.etree.XMLParser(huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic + +# DTD retrival + +@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") +def lxml_parser(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False) + return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest From e295399f7096f92592ea7aa4d1286619bf39f8d0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:43:37 +0100 Subject: [PATCH 12/44] Python: Properly handle `huge_tree` in lxml --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 5 +---- .../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4ecd2d8a99e9..58b7edc327be 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -226,10 +226,7 @@ private module Xml { ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - ( - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and - not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False f) - ) + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index b29a6d049db1..fc6f8c9ad899 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -235,6 +235,7 @@ subpaths | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | From 703e3e8a0f9f81df20a924b25412baf4b9189086 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 14:46:48 +0100 Subject: [PATCH 13/44] Python: Handle DTD retrieval vuln in lxml --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 4 ++++ .../query-tests/Security/CWE-611/XmlEntityInjection.expected | 1 + 2 files changed, 5 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 58b7edc327be..315199e748c1 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -227,6 +227,10 @@ private module Xml { or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index fc6f8c9ad899..3c5ad70b23b9 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -237,6 +237,7 @@ subpaths | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | +| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | From 61291936bfcb2667647f330ca0a512b33c80e82c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 15:06:55 +0100 Subject: [PATCH 14/44] Python: Properly model `xml.etree` --- .../ql/src/experimental/semmle/python/frameworks/Xml.qll | 7 ++++++- .../Security/CWE-611/XmlEntityInjection.expected | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 315199e748c1..5140915e0792 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -24,7 +24,9 @@ private module Xml { override DataFlow::Node getAnInput() { none() } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } /** @@ -58,6 +60,9 @@ private module Xml { override DataFlow::Node getAnInput() { result = this.getArg(0) } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + not exists(this.getArgByName("parser")) and + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index 3c5ad70b23b9..bf43d01cec1e 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -242,6 +242,11 @@ subpaths | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | From 3affa6cf3abd3e88fed8722f959b0b5851936809 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 15:08:56 +0100 Subject: [PATCH 15/44] Python: Annotate xmltodict tests --- .../experimental/query-tests/Security/CWE-611/xml_to_dict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py index 2b91a22e1a22..8f43d2e1cc1f 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py @@ -8,10 +8,10 @@ def xmltodict_parse(): xml_content = request.args['xml_content'] - return xmltodict.parse(xml_content) + return xmltodict.parse(xml_content) # OK @app.route("/xmltodict.parse2") def xmltodict_parse2(): xml_content = request.args['xml_content'] - return xmltodict.parse(xml_content, disable_entities=False) \ No newline at end of file + return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic From c4d08db62aafec4a020f4836a4bcb86329cc517b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:30:16 +0100 Subject: [PATCH 16/44] Python: Expand XML PoC with minidom/pulldom/expat --- .../Security/CWE-611/dont_extract/PoC.py | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py index 85301c32bff6..862346de3e8a 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -143,6 +143,7 @@ def fast_func(): # ============================================================================== import xml.sax +import xml.sax.handler class SimpleHandler(xml.sax.ContentHandler): def __init__(self): @@ -447,3 +448,203 @@ def test_dtd_not_possible(): d = xmltodict.parse(dtd_retrieval) assert hit_dtd == False + +# ============================================================================== +import xml.dom.minidom + +class TestMinidom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + xml.dom.minidom.parseString(billion_laughs) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + xml.dom.minidom.parseString(quadratic_blowup) + + @staticmethod + def test_ok_xml(): + doc = xml.dom.minidom.parseString(ok_xml) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.minidom.parseString(local_xxe) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes == [] + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.minidom.parseString(local_xxe, parser=parser) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "SECRET_FLAG" + + # which also works remotely + global hit_xxe + hit_xxe = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(remote_xxe, parser=parser) + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + _doc = xml.dom.minidom.parseString(dtd_retrieval) + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser) + assert hit_dtd == True + +# ============================================================================== +import xml.dom.pulldom + +class TestPulldom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + doc = xml.dom.pulldom.parseString(billion_laughs) + # you NEED to iterate over the items for it to take long + for event, node in doc: + pass + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + doc = xml.dom.pulldom.parseString(quadratic_blowup) + for event, node in doc: + pass + + @staticmethod + def test_ok_xml(): + doc = xml.dom.pulldom.parseString(ok_xml) + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + assert node.data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.pulldom.parseString(local_xxe) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(local_xxe, parser=parser) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == True + + # which also works remotely + global hit_xxe + hit_xxe = False + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser) + assert hit_xxe == False + for event, node in doc: + pass + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + doc = xml.dom.pulldom.parseString(dtd_retrieval) + for event, node in doc: + pass + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser) + for event, node in doc: + pass + assert hit_dtd == True + +# ============================================================================== +import xml.parsers.expat + +class TestExpat: + # this is the underlying parser implementation used by the rest of the Python + # standard library. But people are probably not using this directly. + + @staticmethod + @expects_timeout + def test_billion_laughs(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(billion_laughs, True) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(quadratic_blowup, True) + + @staticmethod + def test_ok_xml(): + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(ok_xml, True) + + assert char_data_recv == ["hello world"] + + @staticmethod + def test_xxe(): + # not vuln by default + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(local_xxe, True) + + assert char_data_recv == [] + + # there might be ways to make it vuln, but I did not investigate futher. + + @staticmethod + def test_dtd(): + # not vuln by default + global hit_dtd + hit_dtd = False + + parser = xml.parsers.expat.ParserCreate() + parser.Parse(dtd_retrieval, True) + assert hit_dtd == False + + # there might be ways to make it vuln, but I did not investigate futher. From 5a652480b1f26eba9ed9c82e338c5058330bf51a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:37:25 +0100 Subject: [PATCH 17/44] Python: Annotate xml.dom tests --- .../query-tests/Security/CWE-611/xml_dom.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py index 428a2d645a1b..55762ece0be5 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py @@ -12,25 +12,25 @@ def xml_minidom_parse(): xml_content = request.args['xml_content'] - return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes + return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_minidom_parseString") def xml_minidom_parseString(): xml_content = request.args['xml_content'] - return xml.dom.minidom.parseString(xml_content).documentElement.childNodes + return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_pulldom_parse") def xml_pulldom_parse(): xml_content = request.args['xml_content'] - return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes + return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic @app.route("/xml_pulldom_parseString") def xml_pulldom_parseString(): xml_content = request.args['xml_content'] - return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes + return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic # With parsers @@ -40,5 +40,4 @@ def xml_minidom_parse_xml_sax_make_parser(): parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes - + return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic From 9406a972cdbf24ab8c0e5608490042ffc12b297f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 17:52:11 +0100 Subject: [PATCH 18/44] Python: Fix vuln detection for xml.minidom with parser arg --- .../semmle/python/frameworks/Xml.qll | 28 +++++++------------ .../CWE-611/XmlEntityInjection.expected | 2 ++ 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 5140915e0792..d7e27f35b0d9 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -302,19 +302,9 @@ private module Xml { } /** - * Gets a call to: - * * `xml.dom.minidom.parse` - * * `xml.dom.pulldom.parse` - * - * Given the following example: + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. * - * ```py - * xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode - * ``` - * - * * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`. - * * `getAnInput()`'s result would be `StringIO(xml_content)`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + * Both of these modules are based on SAX parsers. */ private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLDomParsing() { @@ -326,15 +316,17 @@ private module Xml { .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - not exists(this.getArgByName("parser")) + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index bf43d01cec1e..b08e7dd727e4 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -242,6 +242,7 @@ subpaths | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | @@ -252,6 +253,7 @@ subpaths | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | From 7cda901da21b814d96c326d8499d6d9b2ca3de9f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 19:35:33 +0100 Subject: [PATCH 19/44] Python: Add separate query for SimpleXMLRPCServer This was a rough quick-n-dirty query, and should get some qhelp as well at some point. --- .../Security/CWE-611/SimpleXmlRpcServer.ql | 27 +++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 30 ------------------- .../CWE-611/SimpleXmlRpcServer.expected | 1 + .../Security/CWE-611/SimpleXmlRpcServer.qlref | 1 + .../Security/CWE-611/xmlrpc_server.py | 8 +++-- 5 files changed, 34 insertions(+), 33 deletions(-) create mode 100644 python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql new file mode 100644 index 000000000000..0e3deebf6016 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -0,0 +1,27 @@ +/** + * @name SimpleXMLRPCServer DoS vulnerability + * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input + * @kind path-problem + * @problem.severity warning + * @precision high + * @id py/simple-xml-rpc-server + * @tags security + * external/cwe/cwe-776 + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +from DataFlow::CallCfgNode call, string kinds +where + call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and + kinds = + strictconcat(XML::XMLVulnerabilityKind kind | + kind.isBillionLaughs() or kind.isQuadraticBlowup() + | + kind, ", " + ) +select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "." diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index d7e27f35b0d9..bf481a1f2a34 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -329,34 +329,4 @@ private module Xml { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } } - - /** - * Gets a call to `xmlrpc.server.SimpleXMLRPCServer`. - * - * Given the following example: - * - * ```py - * server = SimpleXMLRPCServer(("127.0.0.1", 8000)) - * server.register_function(foo, "foo") - * server.serve_forever() - * ``` - * - * * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`. - * * `getAnInput()`'s result would be `foo`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. - */ - private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLRPCServer() { - this = - API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() - } - - override DataFlow::Node getAnInput() { - result = this.getAMethodCall("register_function").getArg(0) - } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - } } diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected new file mode 100644 index 000000000000..4a08d61c47af --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected @@ -0,0 +1 @@ +| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref new file mode 100644 index 000000000000..a0b30e6d69b8 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/SimpleXmlRpcServer.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py index baa433c4a8ab..83c18b549b3d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py @@ -1,10 +1,12 @@ from xmlrpc.server import SimpleXMLRPCServer -def foo(n): - return n +def foo(n: str): + print("foo called with arg:", n, type(n)) + return "ok" server = SimpleXMLRPCServer(("127.0.0.1", 8000)) server.register_function(foo, "foo") server.serve_forever() -# billion_laughs -> curl 127.0.0.1:8000 --data-raw ']>foo&lol9;' +# normal: curl 127.0.0.1:8000 --data-raw 'foo42' +# billion_laughs: curl 127.0.0.1:8000 --data-raw ']>foo&lol9;' From 4b03f5c72400106027dd034a95079573904c1a12 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 19:38:31 +0100 Subject: [PATCH 20/44] Python: Rename xml.sax test for consistency --- .../CWE-611/XmlEntityInjection.expected | 102 +++++++++--------- .../{xml_sax_make_parser.py => xml_sax.py} | 0 2 files changed, 51 insertions(+), 51 deletions(-) rename python/ql/test/experimental/query-tests/Security/CWE-611/{xml_sax_make_parser.py => xml_sax.py} (100%) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index b08e7dd727e4..f5f85bf178b8 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -71,25 +71,25 @@ edges | xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | -| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | +| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | +| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | +| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | +| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | +| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | +| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | +| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | +| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | +| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | +| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | +| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | +| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | +| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | +| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | +| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | +| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | @@ -193,31 +193,31 @@ nodes | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax_make_parser.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax_make_parser.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | +| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | | xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | @@ -250,11 +250,11 @@ subpaths | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:75:19:75:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | +| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value | | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py rename to python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py From faebaee141c10ace600153d84a2d8d1952beb73a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:36:16 +0100 Subject: [PATCH 21/44] Python: Use concept tests for XML Parsing I was loosing my mind from looking through those .expected files Just going to take it one file at time, to make reviewing easier --- .../XML/ExperimentalXmlConceptsTests.expected | 0 .../XML/ExperimentalXmlConceptsTests.ql | 33 +++++++ .../frameworks/XML/lxml_etree.py | 40 +++++++++ .../Security/CWE-611/lxml_etree.py | 90 ------------------- 4 files changed, 73 insertions(+), 90 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql new file mode 100644 index 000000000000..8ca33765d64f --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -0,0 +1,33 @@ +import python +import experimental.semmle.python.Concepts +import experimental.semmle.python.frameworks.Xml +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +class XmlParsingTest extends InlineExpectationsTest { + XmlParsingTest() { this = "XmlParsingTest" } + + override string getARelevantTag() { result in ["input", "vuln"] } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(XML::XMLParsing parsing | + exists(DataFlow::Node input | + input = parsing.getAnInput() and + location = input.getLocation() and + element = input.toString() and + value = prettyNodeForInlineTest(input) and + tag = "input" + ) + or + exists(XML::XMLVulnerabilityKind kind | + parsing.vulnerable(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "vuln" + ) + ) + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py new file mode 100644 index 000000000000..64fa51175837 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -0,0 +1,40 @@ +from io import StringIO +import lxml.etree + +x = "some xml" + +# different parsing methods +lxml.etree.fromstring(x) # $ input=x vuln='XXE' + +lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' + +lxml.etree.XML(x) # $ input=x vuln='XXE' + +lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE' + +# With default parsers (nothing changed) +parser = lxml.etree.XMLParser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +parser = lxml.etree.get_default_parser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# XXE-safe +parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser=parser) # $ input=x + +# XXE-vuln +parser = lxml.etree.XMLParser(resolve_entities=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# Billion laughs vuln (also XXE) +parser = lxml.etree.XMLParser(huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' + +# Billion laughs, but not XXE +parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# DTD retrival vuln (also XXE) +parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py deleted file mode 100644 index 2c2712098503..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py +++ /dev/null @@ -1,90 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import lxml.etree - -app = Flask(__name__) - -# Parsing - -@app.route("/lxml_etree_fromstring") -def lxml_etree_fromstring(): - xml_content = request.args['xml_content'] - - return lxml.etree.fromstring(xml_content).text # NOT OK for XXE - -@app.route("/lxml_etree_fromstringlist") -def lxml_etree_fromstringlist(): - xml_content = request.args['xml_content'] - - return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE - -@app.route("/lxml_etree_XML") -def lxml_etree_XML(): - xml_content = request.args['xml_content'] - - return lxml.etree.XML(xml_content).text # NOT OK for XXE - -@app.route("/lxml_etree_parse") -def lxml_etree_parse(): - xml_content = request.args['xml_content'] - - return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE - -# With parsers - Default - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.get_default_parser() - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -# With parsers - With options - -# XXE-safe -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False) - return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE - -# XXE-vuln -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE - -# Billion laughs and quadratic blowup (huge_tree) - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(huge_tree=True) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic - -# DTD retrival - -@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+") -def lxml_parser(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False) - return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest From a7134cac2eb339c76f3f299c77b927742e5e0320 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:39:56 +0100 Subject: [PATCH 22/44] Python: Port xml.dom tests --- .../library-tests/frameworks/XML/xml_dom.py | 19 ++++++++ .../query-tests/Security/CWE-611/xml_dom.py | 43 ------------------- 2 files changed, 19 insertions(+), 43 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py new file mode 100644 index 000000000000..ade6ece910d8 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -0,0 +1,19 @@ +from io import StringIO +import xml.dom.minidom +import xml.dom.pulldom +import xml.sax + +x = "some xml" + +# minidom +xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# pulldom +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py deleted file mode 100644 index 55762ece0be5..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py +++ /dev/null @@ -1,43 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xml.dom.minidom -import xml.dom.pulldom -import xml.sax - -app = Flask(__name__) - -# Parsing - -@app.route("/xml_minidom_parse") -def xml_minidom_parse(): - xml_content = request.args['xml_content'] - - return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_minidom_parseString") -def xml_minidom_parseString(): - xml_content = request.args['xml_content'] - - return xml.dom.minidom.parseString(xml_content).documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_pulldom_parse") -def xml_pulldom_parse(): - xml_content = request.args['xml_content'] - - return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -@app.route("/xml_pulldom_parseString") -def xml_pulldom_parseString(): - xml_content = request.args['xml_content'] - - return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes # OK for XXE/DTD, NOT OK for billion laughs/quadratic - -# With parsers - -@app.route("/xml_minidom_parse_xml_sax_make_parser") -def xml_minidom_parse_xml_sax_make_parser(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic From 5fb4c4d1524f8a6bae5a8a3ff1c40b35b66f0998 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:50:45 +0100 Subject: [PATCH 23/44] Python: Port xml.etree tests --- .../library-tests/frameworks/XML/xml_etree.py | 19 ++++++ .../Security/CWE-611/dont_extract/PoC.py | 17 +++++ .../query-tests/Security/CWE-611/xml_etree.py | 64 ------------------- 3 files changed, 36 insertions(+), 64 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py new file mode 100644 index 000000000000..e2b81b3ad529 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -0,0 +1,19 @@ +from io import StringIO +import xml.etree.ElementTree + +x = "some xml" + +# Parsing in different ways +xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# With parsers (no options available to disable/enable security features) +parser = xml.etree.ElementTree.XMLParser() +xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# note: it's technically possible to use the thing wrapper func `fromstring` with an +# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it +# seems very unlikely that anyone would do this, so we have intentionally not added any +# tests for this. diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py index 862346de3e8a..b38ff9889e9d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py @@ -250,6 +250,23 @@ def test_ok_xml(): assert root.tag == "test" assert root.text == "hello world" + @staticmethod + def test_ok_xml_sax_parser(): + # you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :| + parser = xml.sax.make_parser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root == None + + @staticmethod + def test_ok_xml_lxml_parser(): + # this is technically possible, since parsers follow the same API, and the + # `fromstring` function is just a thin wrapper... seems very unlikely that + # anyone would do this though :| + parser = lxml.etree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + @staticmethod def test_xxe_not_possible(): parser = xml.etree.ElementTree.XMLParser() diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py deleted file mode 100644 index b9c980045e2a..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py +++ /dev/null @@ -1,64 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xml.etree -import xml.etree.ElementTree -import lxml.etree - -app = Flask(__name__) - -# Parsing - -@app.route("/xml_etree_fromstring") -def xml_etree_fromstring(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.fromstring(xml_content).text - -@app.route("/xml_etree_fromstringlist") -def xml_etree_fromstringlist(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.fromstringlist(xml_content).text - -@app.route("/xml_etree_XML") -def xml_etree_XML(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.XML(xml_content).text - -@app.route("/xml_etree_parse") -def xml_etree_parse(): - xml_content = request.args['xml_content'] - - return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text - -# With parsers - -@app.route("/xml_etree_fromstring-xml_etree_XMLParser") -def xml_parser_1(): - xml_content = request.args['xml_content'] - - parser = xml.etree.ElementTree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_etree_XMLParser") -def xml_parser_2(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_get_default_parser") -def xml_parser_3(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.get_default_parser() - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - -@app.route("/xml_etree_fromstring-lxml_get_default_parser") -def xml_parser_4(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text \ No newline at end of file From 0b12d918171ee00b8a40f576d75c65b38193ebf0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:57:04 +0100 Subject: [PATCH 24/44] Python: Port xml.sax tests --- .../library-tests/frameworks/XML/xml_sax.py | 47 ++++++++ .../query-tests/Security/CWE-611/xml_sax.py | 104 ------------------ 2 files changed, 47 insertions(+), 104 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py new file mode 100644 index 000000000000..47f6600b153e --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -0,0 +1,47 @@ +from io import StringIO +import xml.sax + +x = "some xml" + +class MainHandler(xml.sax.ContentHandler): + def __init__(self): + self._result = [] + + def characters(self, data): + self._result.append(data) + + def parse(self, f): + xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup' + self._result + +MainHandler().parse(StringIO(x)) + +parser = xml.sax.make_parser() +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# You can make it vuln to both XXE and DTD retrieval by setting this flag +# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# Forward Type Tracking test + +def func(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + else: + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# make it vuln, then making it safe +# a bit of an edge-case, but is nice to be able to handle. +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py deleted file mode 100644 index e95abf753796..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax.py +++ /dev/null @@ -1,104 +0,0 @@ -from flask import request, Flask -from io import StringIO -import xml.sax - -app = Flask(__name__) - - -class MainHandler(xml.sax.ContentHandler): - def __init__(self): - self._result = [] - - def characters(self, data): - self._result.append(data) - - def parse(self, f): - xml.sax.parse(f, self) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return self._result - -# GOOD - - -@app.route("/MainHandler") -def mainHandler(): - xml_content = request.args['xml_content'] - - return MainHandler().parse(StringIO(xml_content)) - - -@app.route("/xml.sax.make_parser()+MainHandler") -def xml_makeparser_MainHandler(): - xml_content = request.args['xml_content'] - - GoodHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return GoodHandler._result - - -@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_False") -def xml_makeparser_MainHandler_entitiesFalse(): - xml_content = request.args['xml_content'] - - GoodHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(GoodHandler) - # https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) # # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return GoodHandler._result - -@app.route("not-user-controlled") -def not_user_controlled(): - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse("/not-user-controlled/default_config.xml") # OK - return - -# BAD - -@app.route("/xml.sax.make_parser()+MainHandler-xml.sax.handler.feature_external_ges_True") -def xml_makeparser_MainHandler_entitiesTrue(): - xml_content = request.args['xml_content'] - - BadHandler = MainHandler() - parser = xml.sax.make_parser() - parser.setContentHandler(BadHandler) - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - return BadHandler._result - - -@app.route("/xml.sax.make_parser()+xml.dom.minidom.parse-xml.sax.handler.feature_external_ges_True") -def xml_makeparser_minidom_entitiesTrue(): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - doc = xml.dom.minidom.parse(StringIO(xml_content), parser=parser) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - return doc.documentElement.childNodes - -# Forward Type Tracking test - -@app.route("forward_tracking1") -def forward_tracking1(action): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - if action == 'load-config': - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(xml_content)) # NOT OK for XXE/DTD, NOT OK for billion laughs/quadratic - else: - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return - -@app.route("forward_tracking2") -def forward_tracking2(action): - xml_content = request.args['xml_content'] - - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(xml_content)) # OK for XXE/DTD, NOT OK for billion laughs/quadratic - return From c739ae40b60ef5644d0c0e9c1a8238742f2207e2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:59:00 +0100 Subject: [PATCH 25/44] Python: Port `xmltodict` tests --- .../library-tests/frameworks/XML/xmltodict.py | 6 ++++++ .../query-tests/Security/CWE-611/xml_to_dict.py | 17 ----------------- 2 files changed, 6 insertions(+), 17 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py new file mode 100644 index 000000000000..ee0b38719125 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -0,0 +1,6 @@ +import xmltodict + +x = "some xml" + +xmltodict.parse(x) # $ input=x +xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py deleted file mode 100644 index 8f43d2e1cc1f..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py +++ /dev/null @@ -1,17 +0,0 @@ -from flask import request, Flask -from io import StringIO, BytesIO -import xmltodict - -app = Flask(__name__) - -@app.route("/xmltodict.parse") -def xmltodict_parse(): - xml_content = request.args['xml_content'] - - return xmltodict.parse(xml_content) # OK - -@app.route("/xmltodict.parse2") -def xmltodict_parse2(): - xml_content = request.args['xml_content'] - - return xmltodict.parse(xml_content, disable_entities=False) # NOT OK for billion laughs/quadratic From 2451123c6712e566fe0256f9349952e8ef738cd2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 20:59:45 +0100 Subject: [PATCH 26/44] Python: Move XML PoC to new test dir --- .../dont_extract => library-tests/frameworks/XML/poc}/PoC.py | 0 .../dont_extract => library-tests/frameworks/XML/poc}/flag | 0 .../library-tests/frameworks/XML/poc/this-dir-is-not-extracted | 1 + 3 files changed, 1 insertion(+) rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/PoC.py (100%) rename python/ql/test/experimental/{query-tests/Security/CWE-611/dont_extract => library-tests/frameworks/XML/poc}/flag (100%) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/PoC.py rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/dont_extract/flag rename to python/ql/test/experimental/library-tests/frameworks/XML/poc/flag diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted new file mode 100644 index 000000000000..b1925ade1d3a --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted @@ -0,0 +1 @@ +just FYI From 32787939721e9478e4075b6c7d2f10a96b2a2cb1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:05:44 +0100 Subject: [PATCH 27/44] Python: Handle more functions and kw-args --- .../semmle/python/frameworks/Xml.qll | 81 ++++++++++++++++--- .../frameworks/XML/lxml_etree.py | 9 ++- .../library-tests/frameworks/XML/xml_dom.py | 12 +++ .../library-tests/frameworks/XML/xml_etree.py | 16 +++- .../library-tests/frameworks/XML/xml_sax.py | 10 +-- .../library-tests/frameworks/XML/xmltodict.py | 2 + 6 files changed, 114 insertions(+), 16 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index bf481a1f2a34..b0e7592c3936 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -53,11 +53,21 @@ private module Xml { API::moduleImport("xml") .getMember("etree") .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("sequence"), + // parse / iterparse + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { not exists(this.getArgByName("parser")) and @@ -163,8 +173,8 @@ private module Xml { * parsed_xml = BadHandler._result * ``` */ - private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxParsing() { + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxInstanceParsing() { this = API::moduleImport("xml") .getMember("sax") @@ -174,7 +184,40 @@ private module Xml { .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + } + + /** + * A call to either `parse` or `parseString` from `xml.sax` module. + * + * See: + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString + */ + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // parse + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { // always vuln to these @@ -262,11 +305,21 @@ private module Xml { this = API::moduleImport("lxml") .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "parse"]) + .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) .getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML + this.getArgByName("text"), + // fromstringlist + this.getArgByName("strings"), + // parse / parseid + this.getArgByName("source"), + ] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { exists(XML::XMLParser xmlParser | @@ -293,7 +346,9 @@ private module Xml { private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("xml_input")] + } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and @@ -317,7 +372,15 @@ private module Xml { } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("string"), this.getArgByName("file")] + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] } DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 64fa51175837..3e6e6fb08e7f 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -5,12 +5,19 @@ # different parsing methods lxml.etree.fromstring(x) # $ input=x vuln='XXE' +lxml.etree.fromstring(text=x) # $ input=x vuln='XXE' lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' +lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE' lxml.etree.XML(x) # $ input=x vuln='XXE' +lxml.etree.XML(text=x) # $ input=x vuln='XXE' -lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' + +lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' # With default parsers (nothing changed) parser = lxml.etree.XMLParser() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py index ade6ece910d8..7dce29fc7b9d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -7,13 +7,25 @@ # minidom xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + # pulldom xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index e2b81b3ad529..23ac3784cbc3 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -5,9 +5,23 @@ # Parsing in different ways xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index 47f6600b153e..89bbec3f1f57 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -10,14 +10,15 @@ def __init__(self): def characters(self, data): self._result.append(data) - def parse(self, f): - xml.sax.parse(f, self) # $ MISSING: input=f vuln='Billion Laughs' vuln='Quadratic Blowup' - self._result +xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -MainHandler().parse(StringIO(x)) +xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser = xml.sax.make_parser() parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges @@ -30,7 +31,6 @@ def parse(self, f): parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' # Forward Type Tracking test - def func(cond): parser = xml.sax.make_parser() if cond: diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py index ee0b38719125..473e51c9fe66 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -3,4 +3,6 @@ x = "some xml" xmltodict.parse(x) # $ input=x +xmltodict.parse(xml_input=x) # $ input=x + xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' From f72f673e7ee82e5fd4156d2d6a5a4e8144d371d7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:09:29 +0100 Subject: [PATCH 28/44] Python: Update `XmlEntityInjection.expected` I had forgotten about this, but better late than never... also added a small representative test --- .../CWE-611/XmlEntityInjection.expected | 279 ++---------------- .../query-tests/Security/CWE-611/test.py | 30 ++ 2 files changed, 53 insertions(+), 256 deletions(-) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/test.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected index f5f85bf178b8..25594b4ddaaf 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -1,260 +1,27 @@ edges -| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | -| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | -| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | -| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | -| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | -| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | -| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | -| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | -| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | -| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | -| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | -| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | -| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | -| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | -| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | -| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | -| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | -| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | -| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | -| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | -| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | -| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | -| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | -| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | -| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | -| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | -| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | -| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | -| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | -| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | -| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | -| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | -| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | -| xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | -| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | -| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | -| xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | -| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | -| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | -| xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | -| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | -| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | -| xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | -| xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | -| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | -| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | -| xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | -| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | -| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | -| xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | -| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | -| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | -| xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | -| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | -| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | -| xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | -| xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | -| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | -| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | -| xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | -| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | -| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | -| xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | -| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | -| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | -| xml_etree.py:60:19:60:25 | ControlFlowNode for request | xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | -| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | -| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | -| xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | -| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | -| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | -| xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | -| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | -| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | -| xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | -| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | -| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | -| xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | -| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | -| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | -| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | -| xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | -| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | -| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | -| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | -| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | -| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | -| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | -| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | -| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | +| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | nodes -| lxml_etree.py:11:19:11:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:11:19:11:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:11:19:11:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:17:19:17:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:17:19:17:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:17:19:17:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | semmle.label | ControlFlowNode for List | -| lxml_etree.py:23:19:23:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:23:19:23:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:23:19:23:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:29:19:29:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:29:19:29:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:29:19:29:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| lxml_etree.py:37:19:37:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:37:19:37:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:37:19:37:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:44:19:44:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:44:19:44:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:44:19:44:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:54:19:54:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:54:19:54:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:54:19:54:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:57:34:57:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:62:19:62:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:62:19:62:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:62:19:62:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:71:19:71:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:71:19:71:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:71:19:71:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:78:19:78:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:78:19:78:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:78:19:78:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| lxml_etree.py:87:19:87:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| lxml_etree.py:87:19:87:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| lxml_etree.py:87:19:87:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_dom.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_dom.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_dom.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_dom.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_dom.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_etree.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:25:19:25:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:25:19:25:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:25:19:25:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_etree.py:39:19:39:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:39:19:39:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:39:19:39:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:46:19:46:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:46:19:46:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:46:19:46:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:53:19:53:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:53:19:53:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:53:19:53:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_etree.py:60:19:60:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_etree.py:60:19:60:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_etree.py:60:19:60:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_etree.py:64:45:64:55 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_sax.py:31:19:31:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:31:19:31:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:31:19:31:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:42:19:42:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:42:19:42:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:42:19:42:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:63:19:63:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:63:19:63:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:63:19:63:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:75:19:75:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:75:19:75:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:75:19:75:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:86:19:86:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:86:19:86:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:86:19:86:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_sax.py:98:19:98:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_sax.py:98:19:98:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_sax.py:98:19:98:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | semmle.label | ControlFlowNode for StringIO() | -| xml_to_dict.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_to_dict.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_to_dict.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_to_dict.py:11:28:11:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| xml_to_dict.py:15:19:15:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| xml_to_dict.py:15:19:15:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:13:34:13:44 | ControlFlowNode for xml_content | This | lxml_etree.py:11:19:11:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:19:38:19:50 | ControlFlowNode for List | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:19:38:19:50 | ControlFlowNode for List | This | lxml_etree.py:17:19:17:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:25:27:25:37 | ControlFlowNode for xml_content | This | lxml_etree.py:23:19:23:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:31:29:31:49 | ControlFlowNode for StringIO() | This | lxml_etree.py:29:19:29:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:40:34:40:44 | ControlFlowNode for xml_content | This | lxml_etree.py:37:19:37:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:47:34:47:44 | ControlFlowNode for xml_content | This | lxml_etree.py:44:19:44:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | lxml_etree.py:65:34:65:44 | ControlFlowNode for xml_content | This | lxml_etree.py:62:19:62:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | lxml_etree.py:74:34:74:44 | ControlFlowNode for xml_content | This | lxml_etree.py:71:19:71:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup, XXE. | lxml_etree.py:81:34:81:44 | ControlFlowNode for xml_content | This | lxml_etree.py:78:19:78:25 | ControlFlowNode for request | user-provided value | -| lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: DTD retrieval. | lxml_etree.py:90:34:90:44 | ControlFlowNode for xml_content | This | lxml_etree.py:87:19:87:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | xml_dom.py:13:19:13:25 | ControlFlowNode for request | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:15:34:15:54 | ControlFlowNode for StringIO() | This | xml_dom.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | xml_dom.py:19:19:19:25 | ControlFlowNode for request | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:21:40:21:50 | ControlFlowNode for xml_content | This | xml_dom.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | xml_dom.py:25:19:25:25 | ControlFlowNode for request | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:27:34:27:54 | ControlFlowNode for StringIO() | This | xml_dom.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | xml_dom.py:39:19:39:25 | ControlFlowNode for request | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_dom.py:43:34:43:54 | ControlFlowNode for StringIO() | This | xml_dom.py:39:19:39:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | xml_etree.py:13:19:13:25 | ControlFlowNode for request | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:15:45:15:55 | ControlFlowNode for xml_content | This | xml_etree.py:13:19:13:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | xml_etree.py:19:19:19:25 | ControlFlowNode for request | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:21:49:21:59 | ControlFlowNode for xml_content | This | xml_etree.py:19:19:19:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | xml_etree.py:25:19:25:25 | ControlFlowNode for request | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:27:38:27:48 | ControlFlowNode for xml_content | This | xml_etree.py:25:19:25:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | xml_etree.py:31:19:31:25 | ControlFlowNode for request | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:33:40:33:60 | ControlFlowNode for StringIO() | This | xml_etree.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | xml_etree.py:39:19:39:25 | ControlFlowNode for request | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_etree.py:42:45:42:55 | ControlFlowNode for xml_content | This | xml_etree.py:39:19:39:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value | -| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax.py:31:19:31:25 | ControlFlowNode for request | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax.py:31:19:31:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax.py:42:19:42:25 | ControlFlowNode for request | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax.py:42:19:42:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax.py:63:19:63:25 | ControlFlowNode for request | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax.py:63:19:63:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | xml_sax.py:75:19:75:25 | ControlFlowNode for request | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:79:33:79:53 | ControlFlowNode for StringIO() | This | xml_sax.py:75:19:75:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax.py:86:19:86:25 | ControlFlowNode for request | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax.py:86:19:86:25 | ControlFlowNode for request | user-provided value | -| xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax.py:98:19:98:25 | ControlFlowNode for request | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax.py:98:19:98:25 | ControlFlowNode for request | user-provided value | -| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text From 33ebcdf43715f10d1deafc493e9fd568f7b78bea Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:26:06 +0100 Subject: [PATCH 29/44] Python: Support feed method of lxml/xml.etree Parsers --- .../semmle/python/frameworks/Xml.qll | 50 +++++++++++++++++++ .../frameworks/XML/lxml_etree.py | 6 +++ .../library-tests/frameworks/XML/xml_etree.py | 6 +++ 3 files changed, 62 insertions(+) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index b0e7592c3936..caf5a3b434ab 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -79,6 +79,28 @@ private module Xml { } } + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getReturn() + .getMember("feed") + .getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + } + /** * A call to the `setFeature` method on a XML sax parser. * @@ -322,6 +344,7 @@ private module Xml { } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // TODO: This should be done with type-tracking exists(XML::XMLParser xmlParser | xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) ) @@ -330,6 +353,33 @@ private module Xml { } } + /** + * A call to the `feed` method of an `lxml.etree` parser. + */ + private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLEtreeParserFeedCall() { + exists(API::Node parserInstance | + parserInstance = + API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn() + or + parserInstance = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn() + | + this = parserInstance.getMember("feed").getACall() + ) + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // TODO: This should be done with type-tracking + exists(XML::XMLParser xmlParser | + xmlParser = this.getObject().getALocalSource() and + xmlParser.vulnerable(kind) + ) + } + } + /** * Gets a call to `xmltodict.parse`. * diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 3e6e6fb08e7f..db8b667e70a5 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -26,6 +26,12 @@ parser = lxml.etree.get_default_parser() lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' +# manual use of feed method +parser = lxml.etree.XMLParser() +parser.feed(x) # $ input=x vuln='XXE' +parser.feed(data=x) # $ input=x vuln='XXE' +parser.close() + # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) lxml.etree.fromstring(x, parser=parser) # $ input=x diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index 23ac3784cbc3..3220d95c624c 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -27,6 +27,12 @@ parser = xml.etree.ElementTree.XMLParser() xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +# manual use of feed method +parser = xml.etree.ElementTree.XMLParser() +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it # seems very unlikely that anyone would do this, so we have intentionally not added any From 46238d5ea049e5b51f99f4b66366957852a649c8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:27:52 +0100 Subject: [PATCH 30/44] Python: Add test for XMLPullParser But handling this in a nice way will require some restructuring --- .../experimental/library-tests/frameworks/XML/xml_etree.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index 3220d95c624c..ee452c11853d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -33,6 +33,12 @@ parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser.close() +# manual use of feed method on XMLPullParser +parser = xml.etree.ElementTree.XMLPullParser() +parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it # seems very unlikely that anyone would do this, so we have intentionally not added any From de0e67f327de078af5c6574445e82f7574f52984 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:31:15 +0100 Subject: [PATCH 31/44] Python: Restructure overall XML modeling --- .../semmle/python/frameworks/Xml.qll | 82 ++++++++++--------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index caf5a3b434ab..55f45df99ca7 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -8,7 +8,7 @@ private import semmle.python.dataflow.new.DataFlow private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs -private module Xml { +private module XmlEtree { /** * Gets a call to `xml.etree.ElementTree.XMLParser`. */ @@ -100,7 +100,9 @@ private module Xml { kind.isBillionLaughs() or kind.isQuadraticBlowup() } } +} +private module SaxBasedParsing { /** * A call to the `setFeature` method on a XML sax parser. * @@ -251,6 +253,45 @@ private module Xml { } } + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. + * + * Both of these modules are based on SAX parsers. + */ + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + } + } +} + +private module Lxml { /** * A call to `lxml.etree.get_default_parser`. * @@ -379,7 +420,9 @@ private module Xml { ) } } +} +private module Xmltodict { /** * Gets a call to `xmltodict.parse`. * @@ -405,41 +448,4 @@ private module Xml { this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } } - - /** - * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. - * - * Both of these modules are based on SAX parsers. - */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLDomParsing() { - this = - API::moduleImport("xml") - .getMember("dom") - .getMember(["minidom", "pulldom"]) - .getMember(["parse", "parseString"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // minidom.parse - this.getArgByName("file"), - // pulldom.parse - this.getArgByName("stream_or_string"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - } - } } From a033b71eaf16dce055696ca7f1485c7f079ad2ed Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:34:46 +0100 Subject: [PATCH 32/44] Python: Align QLdocs of XML modeling --- .../semmle/python/frameworks/Xml.qll | 72 +++++-------------- 1 file changed, 18 insertions(+), 54 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 55f45df99ca7..6c3b86f426e3 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -10,7 +10,7 @@ private import semmle.python.ApiGraphs private module XmlEtree { /** - * Gets a call to `xml.etree.ElementTree.XMLParser`. + * A call to `xml.etree.ElementTree.XMLParser`. */ private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { XMLEtreeParser() { @@ -30,22 +30,13 @@ private module XmlEtree { } /** - * Gets a call to: - * * `xml.etree.ElementTree.fromstring` - * * `xml.etree.ElementTree.fromstringlist` - * * `xml.etree.ElementTree.XML` - * * `xml.etree.ElementTree.parse` - * - * Given the following example: - * - * ```py - * parser = lxml.etree.XMLParser() - * xml.etree.ElementTree.fromstring(xml_content, parser=parser).text - * ``` - * - * * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `XXE`. + * A call to either of: + * - `xml.etree.ElementTree.fromstring` + * - `xml.etree.ElementTree.fromstringlist` + * - `xml.etree.ElementTree.XML` + * - `xml.etree.ElementTree.XMLID` + * - `xml.etree.ElementTree.parse` + * - `xml.etree.ElementTree.iterparse` */ private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLEtreeParsing() { @@ -186,16 +177,7 @@ private module SaxBasedParsing { } /** - * A XML parsing call with a sax parser. - * - * ```py - * BadHandler = MainHandler() - * parser = xml.sax.make_parser() - * parser.setContentHandler(BadHandler) - * parser.setFeature(xml.sax.handler.feature_external_ges, False) - * parser.parse(StringIO(xml_content)) - * parsed_xml = BadHandler._result - * ``` + * A call to the `parse` method on a SAX XML parser. */ private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { XMLSaxInstanceParsing() { @@ -346,22 +328,14 @@ private module Lxml { } /** - * Gets a call to: - * * `lxml.etree.fromstring` - * * `xml.etree.fromstringlist` - * * `xml.etree.XML` - * * `xml.etree.parse` - * - * Given the following example: + * A call to either of: + * - `lxml.etree.fromstring` + * - `lxml.etree.fromstringlist` + * - `lxml.etree.XML` + * - `lxml.etree.parse` + * - `lxml.etree.parseid` * - * ```py - * parser = lxml.etree.XMLParser() - * lxml.etree.fromstring(xml_content, parser=parser).text - * ``` - * - * * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `XXE`. + * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring */ private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { LXMLParsing() { @@ -395,7 +369,7 @@ private module Lxml { } /** - * A call to the `feed` method of an `lxml.etree` parser. + * A call to the `feed` method of an `lxml` parser. */ private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { LXMLEtreeParserFeedCall() { @@ -424,17 +398,7 @@ private module Lxml { private module Xmltodict { /** - * Gets a call to `xmltodict.parse`. - * - * Given the following example: - * - * ```py - * xmltodict.parse(xml_content, disable_entities=False) - * ``` - * - * * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`. - * * `getAnInput()`'s result would be `xml_content`. - * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`. + * A call to `xmltodict.parse`. */ private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } From c0a2c25f5a712967ea5d067907e7c5be7b71a144 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 21:38:52 +0100 Subject: [PATCH 33/44] Python: Restructure modeling of `xml.etree` parsers --- .../semmle/python/frameworks/Xml.qll | 96 +++++++++++-------- .../library-tests/frameworks/XML/xml_etree.py | 4 +- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 6c3b86f426e3..e6a52fc19be6 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -10,22 +10,65 @@ private import semmle.python.ApiGraphs private module XmlEtree { /** - * A call to `xml.etree.ElementTree.XMLParser`. + * Provides models for `xml.etree` parsers + * + * See + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser */ - private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - XMLEtreeParser() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getACall() + module XMLParser { + /** + * A source of instances of `xml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `xml.etree` parsers. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getACall() + or + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLPullParser") + .getACall() + } } - override DataFlow::Node getAnInput() { none() } + /** Gets a reference to an `xml.etree` parser instance. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() + /** Gets a reference to an `xml.etree` parser instance. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } } } @@ -61,33 +104,8 @@ private module XmlEtree { } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - not exists(this.getArgByName("parser")) and - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) - } - } - - /** - * A call to the `feed` method of an `xml.etree` parser. - */ - private class XMLEtreeParserFeedCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLEtreeParserFeedCall() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getReturn() - .getMember("feed") - .getACall() - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // note: it does not matter what `xml.etree` parser you are using, you cannot + // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index ee452c11853d..df126e458e2d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -35,8 +35,8 @@ # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ MISSING: input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an From c0a6f9f3fdcd7d3b52d4da7fb5657ad839686322 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:00:55 +0100 Subject: [PATCH 34/44] Python: Restructure lxml modeling and handle parser being passed as positional argument --- .../semmle/python/frameworks/Xml.qll | 164 ++++++++++-------- .../frameworks/XML/lxml_etree.py | 1 + 2 files changed, 94 insertions(+), 71 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index e6a52fc19be6..4af068cad317 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -293,55 +293,104 @@ private module SaxBasedParsing { private module Lxml { /** - * A call to `lxml.etree.get_default_parser`. + * Provides models for `lxml.etree` parsers * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ - private class LXMLDefaultParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - LXMLDefaultParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + module XMLParser { + /** + * A source of instances of `lxml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { + /** Holds if this instance is vulnerable to `kind`. */ + abstract predicate vulnerable(XML::XMLVulnerabilityKind kind); } - override DataFlow::Node getAnInput() { none() } + /** + * A call to `lxml.etree.XMLParser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() + } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // as highlighted by - // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - // by default XXE is allow. so as long as the default parser has not been - // overridden, the result is also vuln to XXE. - kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + // NOTE: it's not possible to change settings of a parser after constructing it + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + kind.isXxe() and + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) + ) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + } } - } - /** - * A call to `lxml.etree.XMLParser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range { - LXMLParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() - } + /** + * A call to `lxml.etree.get_default_parser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLDefaultParser() { + this = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } - override DataFlow::Node getAnInput() { none() } + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + } + } - // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - kind.isXxe() and - ( - // resolve_entities has default True - not exists(this.getArgByName("resolve_entities")) - or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) - ) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { + t.start() and + result = origin or - kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + DataFlow::Node instance(InstanceSource origin) { + instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) + } + + /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ + DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { + exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind)) + } + + /** + * A call to the `feed` method of an `lxml` parser. + */ + private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLParserFeedCall() { this.calls(instance(_), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + this.calls(instanceVulnerableTo(kind), "feed") + } } } @@ -376,40 +425,13 @@ private module Lxml { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // TODO: This should be done with type-tracking - exists(XML::XMLParser xmlParser | - xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind) - ) - or - kind.isXxe() and not exists(this.getArgByName("parser")) - } - } - - /** - * A call to the `feed` method of an `lxml` parser. - */ - private class LXMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - LXMLEtreeParserFeedCall() { - exists(API::Node parserInstance | - parserInstance = - API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getReturn() - or - parserInstance = - API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getReturn() - | - this = parserInstance.getMember("feed").getACall() - ) - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } override predicate vulnerable(XML::XMLVulnerabilityKind kind) { - // TODO: This should be done with type-tracking - exists(XML::XMLParser xmlParser | - xmlParser = this.getObject().getALocalSource() and - xmlParser.vulnerable(kind) - ) + this.getParserArg() = XMLParser::instanceVulnerableTo(kind) + or + kind.isXxe() and + not exists(this.getParserArg()) } } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index db8b667e70a5..47ade6431221 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -34,6 +34,7 @@ # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser) # $ input=x lxml.etree.fromstring(x, parser=parser) # $ input=x # XXE-vuln From df8e0fce68c1ea11bacaf789caebfbd7e5391376 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:02:48 +0100 Subject: [PATCH 35/44] Python: Minor fixup of qldoc --- .../experimental/semmle/python/frameworks/Xml.qll | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4af068cad317..a3f79967b67c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -153,7 +153,11 @@ private module SaxBasedParsing { result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) } - /** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */ + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { t.start() and exists(SaxParserSetFeatureCall call | @@ -189,7 +193,11 @@ private module SaxBasedParsing { ) } - /** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */ + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) } From 837daaae3b5f2fba2e405f8cf7900a9c51999769 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:04:48 +0100 Subject: [PATCH 36/44] Python: Remove XMLParser concept --- .../experimental/semmle/python/Concepts.qll | 39 ------------------- .../XmlEntityInjectionCustomizations.qll | 13 ------- 2 files changed, 52 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 22616c0a5d2b..29ce05501ca9 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -76,45 +76,6 @@ module XML { abstract predicate vulnerable(XMLVulnerabilityKind kind); } } - - /** - * A data-flow node that constructs an XML parser. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParser` instead. - */ - class XMLParser extends DataFlow::Node instanceof XMLParser::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Holds if the parser is vulnerable to `kind`. - */ - predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } - } - - /** Provides classes for modeling XML parsers. */ - module XMLParser { - /** - * A data-flow node that constructs an XML parser. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParser` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - - /** - * Holds if the parser is vulnerable to `kind`. - */ - abstract predicate vulnerable(XMLVulnerabilityKind kind); - } - } } /** Provides classes for modeling LDAP query execution-related APIs. */ diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 7de0c0c4b9c2..44c5da0bcea1 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -61,19 +61,6 @@ module XmlEntityInjection { override string getVulnerableKind() { xmlParsing.vulnerable(result) } } - /** - * An input to an XML parser, considered as a flow sink. - * - * See `XML::XMLParser` - */ - class XMLParserInputAsSink extends Sink { - XML::XMLParser xmlParser; - - XMLParserInputAsSink() { this = xmlParser.getAnInput() } - - override string getVulnerableKind() { xmlParser.vulnerable(result) } - } - /** * A source of remote user input, considered as a flow source. */ From 0d69dc854c47df55eddb11d500b8cbe9b04f1d75 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:06:26 +0100 Subject: [PATCH 37/44] Python: Minor qldoc improvement --- python/ql/src/experimental/semmle/python/Concepts.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 29ce05501ca9..e8837e233ebc 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -51,7 +51,7 @@ module XML { DataFlow::Node getAnInput() { result = super.getAnInput() } /** - * Holds if the parsing method or the parser holding it is vulnerable to `kind`. + * Holds if this XML parsing is vulnerable to `kind`. */ predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } } @@ -71,7 +71,7 @@ module XML { abstract DataFlow::Node getAnInput(); /** - * Holds if the parsing method or the parser holding it is vulnerable to `kind`. + * Holds if this XML parsing is vulnerable to `kind`. */ abstract predicate vulnerable(XMLVulnerabilityKind kind); } From 3f6c55e8aeb3c930d730bb719b778811ffa6dbf1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 3 Mar 2022 22:09:31 +0100 Subject: [PATCH 38/44] Python: Rename `vulnerable` predicate => `vulnerableTo` --- .../experimental/semmle/python/Concepts.qll | 4 ++-- .../semmle/python/frameworks/Xml.qll | 24 +++++++++---------- .../XmlEntityInjectionCustomizations.qll | 2 +- .../XML/ExperimentalXmlConceptsTests.ql | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index e8837e233ebc..7ebe90969221 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -53,7 +53,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - predicate vulnerable(XMLVulnerabilityKind kind) { super.vulnerable(kind) } + predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -73,7 +73,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - abstract predicate vulnerable(XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XMLVulnerabilityKind kind); } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index a3f79967b67c..1d34e017f031 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -66,7 +66,7 @@ private module XmlEtree { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } } @@ -103,7 +103,7 @@ private module XmlEtree { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // note: it does not matter what `xml.etree` parser you are using, you cannot // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() @@ -218,7 +218,7 @@ private module SaxBasedParsing { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -251,7 +251,7 @@ private module SaxBasedParsing { ] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -290,7 +290,7 @@ private module SaxBasedParsing { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) or @@ -317,7 +317,7 @@ private module Lxml { */ abstract class InstanceSource extends DataFlow::LocalSourceNode { /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerable(XML::XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind); } /** @@ -331,7 +331,7 @@ private module Lxml { } // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { kind.isXxe() and ( // resolve_entities has default True @@ -360,7 +360,7 @@ private module Lxml { API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { // as highlighted by // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser // by default XXE is allow. so as long as the default parser has not been @@ -385,7 +385,7 @@ private module Lxml { /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { - exists(InstanceSource origin | result = instance(origin) and origin.vulnerable(kind)) + exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) } /** @@ -396,7 +396,7 @@ private module Lxml { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } } @@ -435,7 +435,7 @@ private module Lxml { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { this.getParserArg() = XMLParser::instanceVulnerableTo(kind) or kind.isXxe() and @@ -455,7 +455,7 @@ private module Xmltodict { result in [this.getArg(0), this.getArgByName("xml_input")] } - override predicate vulnerable(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll index 44c5da0bcea1..745658bbce7b 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -58,7 +58,7 @@ module XmlEntityInjection { XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - override string getVulnerableKind() { xmlParsing.vulnerable(result) } + override string getVulnerableKind() { xmlParsing.vulnerableTo(result) } } /** diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql index 8ca33765d64f..81bc391d0e55 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -22,7 +22,7 @@ class XmlParsingTest extends InlineExpectationsTest { ) or exists(XML::XMLVulnerabilityKind kind | - parsing.vulnerable(kind) and + parsing.vulnerableTo(kind) and location = parsing.getLocation() and element = parsing.toString() and value = "'" + kind + "'" and From 683c2fa8254ebd56ec04a8e0fadb7bdb129c29e7 Mon Sep 17 00:00:00 2001 From: Jorge <46056498+jorgectf@users.noreply.github.com> Date: Fri, 4 Mar 2022 01:02:56 +0100 Subject: [PATCH 39/44] Apply suggestions from code review --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 1d34e017f031..86c17374e086 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -154,7 +154,7 @@ private module SaxBasedParsing { } /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. * * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges */ @@ -194,7 +194,7 @@ private module SaxBasedParsing { } /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on.class + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. * * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges */ @@ -301,7 +301,7 @@ private module SaxBasedParsing { private module Lxml { /** - * Provides models for `lxml.etree` parsers + * Provides models for `lxml.etree` parsers. * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ From 3cd165d5b757be7651f6f9ade20bca773b27e582 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:15:50 +0100 Subject: [PATCH 40/44] Python: Apply suggestions from code review Co-authored-by: Jorge <46056498+jorgectf@users.noreply.github.com> --- .../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 6 ++---- python/ql/src/experimental/semmle/python/Concepts.qll | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index 0e3deebf6016..4177daf29c10 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -1,17 +1,15 @@ /** * @name SimpleXMLRPCServer DoS vulnerability * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input - * @kind path-problem + * @kind problem * @problem.severity warning * @precision high - * @id py/simple-xml-rpc-server + * @id py/simple-xml-rpc-server-dos * @tags security * external/cwe/cwe-776 */ private import python -private import semmle.python.dataflow.new.DataFlow -private import semmle.python.Concepts private import experimental.semmle.python.Concepts private import semmle.python.ApiGraphs diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 7ebe90969221..491267d057f2 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -22,7 +22,7 @@ module XML { */ class XMLVulnerabilityKind extends string { XMLVulnerabilityKind() { - this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval",] + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] } /** Holds for Billion Laughs vulnerability kind. */ From d6cbfec43435204bb5e7350d26d9c636073b6652 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 09:46:49 +0100 Subject: [PATCH 41/44] Python: huge_tree tests were wrong Nice spotted @jorgectf! --- .../frameworks/XML/lxml_etree.py | 4 ++-- .../library-tests/frameworks/XML/poc/PoC.py | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 47ade6431221..7c62ed1ac6ae 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -45,9 +45,9 @@ parser = lxml.etree.XMLParser(huge_tree=True) lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' -# Billion laughs, but not XXE +# Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup' # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py index b38ff9889e9d..adcace1aa0a6 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -318,11 +318,21 @@ def test_billion_laughs_manually_enabled(): @expects_timeout def test_quadratic_blowup_manually_enabled(): parser = lxml.etree.XMLParser(huge_tree=True) - try: - _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) - assert False - except lxml.etree.XMLSyntaxError as e: - assert "Detected an entity reference loop" in str(e) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_billion_laughs_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert root.tag == "lolz" + assert root.text == None + + @staticmethod + def test_quadratic_blowup_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert root.tag == "foo" + assert root.text == None @staticmethod def test_ok_xml(): From f0131afc5449459f1562862c557ed537b0ab3a4c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 09:49:00 +0100 Subject: [PATCH 42/44] Python: Fix `huge_tree` modeling --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 3 ++- .../experimental/library-tests/frameworks/XML/lxml_etree.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 86c17374e086..533b97436512 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -341,7 +341,8 @@ private module Lxml { ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and + not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) or kind.isDtdRetrieval() and this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 7c62ed1ac6ae..22930a58af37 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -47,7 +47,7 @@ # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x SPURIOUS: vuln='Billion Laughs' vuln='Quadratic Blowup' +lxml.etree.fromstring(x, parser=parser) # $ input=x # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) From 1a9620a87a4aa9ae406774681431bb2e3274cc88 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:01:02 +0100 Subject: [PATCH 43/44] Python: Add conditional assignment check for sax parser --- .../library-tests/frameworks/XML/xml_sax.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index 89bbec3f1f57..158e62ffae6b 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -45,3 +45,20 @@ def func(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +def check_conditional_assignment(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + else: + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +def check_conditional_assignment2(cond): + parser = xml.sax.make_parser() + if cond: + flag_value = True + else: + flag_value = False + parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' From ef045a6789cd4c7cbe04fba0e15b40461ba9ea75 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 4 Mar 2022 10:18:30 +0100 Subject: [PATCH 44/44] Python: Fix typo in set_default_parser --- python/ql/src/experimental/semmle/python/frameworks/Xml.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 533b97436512..18ba6c5a572c 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -367,7 +367,7 @@ private module Lxml { // by default XXE is allow. so as long as the default parser has not been // overridden, the result is also vuln to XXE. kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.get_default_parser`. + // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. } }