diff --git a/.gitignore b/.gitignore index 379b8a60..c99dd699 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# OS X +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -61,4 +64,4 @@ target/ #Ipython Notebook .ipynb_checkpoints *~ -*# \ No newline at end of file +*# diff --git a/example/import_test_project/A.py b/example/import_test_project/A.py index faa0076d..574c08cb 100644 --- a/example/import_test_project/A.py +++ b/example/import_test_project/A.py @@ -1,2 +1,8 @@ def B(s): return s + +def C(s): + return s + "see" + +def D(s): + return s + "dee" diff --git a/example/import_test_project/all.py b/example/import_test_project/all.py new file mode 100644 index 00000000..e346e801 --- /dev/null +++ b/example/import_test_project/all.py @@ -0,0 +1,6 @@ +from all_folder.has_all import * + + +LemonGrass() +# MangoYuzu is not defined because it is not in __all__ +MangoYuzu() diff --git a/example/import_test_project/__init__.py b/example/import_test_project/all_folder/__init__.py similarity index 100% rename from example/import_test_project/__init__.py rename to example/import_test_project/all_folder/__init__.py diff --git a/example/import_test_project/all_folder/has_all.py b/example/import_test_project/all_folder/has_all.py new file mode 100644 index 00000000..c809fec4 --- /dev/null +++ b/example/import_test_project/all_folder/has_all.py @@ -0,0 +1,9 @@ +__all__ = [ + 'LemonGrass' +] + +def LemonGrass(): + print ('LemonGrass') + +def MangoYuzu(): + print ('MangoYuzu') diff --git a/example/import_test_project/all_folder/no_all.py b/example/import_test_project/all_folder/no_all.py new file mode 100644 index 00000000..8c4db125 --- /dev/null +++ b/example/import_test_project/all_folder/no_all.py @@ -0,0 +1,5 @@ +def _LemonGrass(): + print ('LemonGrass') + +def MangoYuzu(): + print ('MangoYuzu') diff --git a/example/import_test_project/foo/bar.py b/example/import_test_project/foo/bar.py new file mode 100644 index 00000000..f648405f --- /dev/null +++ b/example/import_test_project/foo/bar.py @@ -0,0 +1,2 @@ +def H(s): + return s + "end" diff --git a/example/import_test_project/from_directory.py b/example/import_test_project/from_directory.py new file mode 100644 index 00000000..35bc0a75 --- /dev/null +++ b/example/import_test_project/from_directory.py @@ -0,0 +1,3 @@ +from foo import bar + +bar.H('hey') diff --git a/example/import_test_project/from_dot.py b/example/import_test_project/from_dot.py new file mode 100644 index 00000000..1c68bd49 --- /dev/null +++ b/example/import_test_project/from_dot.py @@ -0,0 +1,4 @@ +from . import A + + +c = A.B('sss') diff --git a/example/import_test_project/main.py b/example/import_test_project/import.py similarity index 100% rename from example/import_test_project/main.py rename to example/import_test_project/import.py diff --git a/example/import_test_project/import_as.py b/example/import_test_project/import_as.py new file mode 100644 index 00000000..37045505 --- /dev/null +++ b/example/import_test_project/import_as.py @@ -0,0 +1,4 @@ +from A import B +import A as foo +b = B('str') +c = foo.B('sss') diff --git a/example/import_test_project/init.py b/example/import_test_project/init.py new file mode 100644 index 00000000..bf2914a3 --- /dev/null +++ b/example/import_test_project/init.py @@ -0,0 +1,4 @@ +import init_file_folder + + +init_file_folder.Eataly() diff --git a/example/import_test_project/init_file_folder/__init__.py b/example/import_test_project/init_file_folder/__init__.py new file mode 100755 index 00000000..f9b27ddb --- /dev/null +++ b/example/import_test_project/init_file_folder/__init__.py @@ -0,0 +1 @@ +from .nested_folder import StarbucksVisitor as Eataly diff --git a/example/import_test_project/init_file_folder/nested_folder/__init__.py b/example/import_test_project/init_file_folder/nested_folder/__init__.py new file mode 100755 index 00000000..b1378324 --- /dev/null +++ b/example/import_test_project/init_file_folder/nested_folder/__init__.py @@ -0,0 +1 @@ +from .starbucks import StarbucksVisitor diff --git a/example/import_test_project/init_file_folder/nested_folder/nested_nested_folder/can_you_see_me.py b/example/import_test_project/init_file_folder/nested_folder/nested_nested_folder/can_you_see_me.py new file mode 100644 index 00000000..e69de29b diff --git a/example/import_test_project/init_file_folder/nested_folder/starbucks.py b/example/import_test_project/init_file_folder/nested_folder/starbucks.py new file mode 100755 index 00000000..cb428b48 --- /dev/null +++ b/example/import_test_project/init_file_folder/nested_folder/starbucks.py @@ -0,0 +1,3 @@ +class StarbucksVisitor(object): + def __init__(self): + print ("Iced Mocha") diff --git a/example/import_test_project/multiple_files/A.py b/example/import_test_project/multiple_files/A.py new file mode 100644 index 00000000..b9c3d8bf --- /dev/null +++ b/example/import_test_project/multiple_files/A.py @@ -0,0 +1,2 @@ +def cosme(s): + return s + "aaa" diff --git a/example/import_test_project/multiple_files/B.py b/example/import_test_project/multiple_files/B.py new file mode 100644 index 00000000..4ac10ecb --- /dev/null +++ b/example/import_test_project/multiple_files/B.py @@ -0,0 +1,2 @@ +def foo(s): + return s + "bee" diff --git a/example/import_test_project/multiple_files/C.py b/example/import_test_project/multiple_files/C.py new file mode 100644 index 00000000..f4170c9e --- /dev/null +++ b/example/import_test_project/multiple_files/C.py @@ -0,0 +1,2 @@ +def foo(s): + return s + "see" diff --git a/example/import_test_project/multiple_files/D.py b/example/import_test_project/multiple_files/D.py new file mode 100644 index 00000000..64c47ce2 --- /dev/null +++ b/example/import_test_project/multiple_files/D.py @@ -0,0 +1,2 @@ +def foo(s): + return s + "dee" diff --git a/example/import_test_project/multiple_files_with_aliases.py b/example/import_test_project/multiple_files_with_aliases.py new file mode 100644 index 00000000..7b4eeb72 --- /dev/null +++ b/example/import_test_project/multiple_files_with_aliases.py @@ -0,0 +1,7 @@ +from .multiple_files import A, B as keens, C as per_se, D as duck_house + + +a = A.cosme('tlayuda') +b = keens.foo('mutton') +c = per_se.foo('tasting') +d = duck_house.foo('peking') diff --git a/example/import_test_project/multiple_functions_with_aliases.py b/example/import_test_project/multiple_functions_with_aliases.py new file mode 100644 index 00000000..070352e9 --- /dev/null +++ b/example/import_test_project/multiple_functions_with_aliases.py @@ -0,0 +1,6 @@ +from .A import B as keens, C, D as duck_house + + +a = keens('mutton') +b = C('tasting') +c = duck_house('peking') diff --git a/example/import_test_project/no_all.py b/example/import_test_project/no_all.py new file mode 100644 index 00000000..2b12159f --- /dev/null +++ b/example/import_test_project/no_all.py @@ -0,0 +1,6 @@ +from all_folder.no_all import * + + +# _LemonGrass is not defined because it starts with _ +_LemonGrass() +MangoYuzu() diff --git a/example/import_test_project/other_dir/from_dot_dot.py b/example/import_test_project/other_dir/from_dot_dot.py new file mode 100644 index 00000000..25c16b51 --- /dev/null +++ b/example/import_test_project/other_dir/from_dot_dot.py @@ -0,0 +1,4 @@ +from .. import A + + +c = A.B('sss') diff --git a/example/import_test_project/other_dir/relative_between_folders.py b/example/import_test_project/other_dir/relative_between_folders.py new file mode 100644 index 00000000..73b1a4ec --- /dev/null +++ b/example/import_test_project/other_dir/relative_between_folders.py @@ -0,0 +1,3 @@ +from ..foo.bar import H + +result = H('hey') diff --git a/example/import_test_project/relative_from_directory.py b/example/import_test_project/relative_from_directory.py new file mode 100644 index 00000000..f2bdf764 --- /dev/null +++ b/example/import_test_project/relative_from_directory.py @@ -0,0 +1,5 @@ +# Must be run as module via -m +from .foo import bar + + +bar.H('hey') diff --git a/example/import_test_project/relative_level_1.py b/example/import_test_project/relative_level_1.py new file mode 100644 index 00000000..680f36f2 --- /dev/null +++ b/example/import_test_project/relative_level_1.py @@ -0,0 +1,4 @@ +from .A import B +import A +b = B('str') +c = A.B('sss') diff --git a/example/import_test_project/relative_level_2.py b/example/import_test_project/relative_level_2.py new file mode 100644 index 00000000..e0f231ff --- /dev/null +++ b/example/import_test_project/relative_level_2.py @@ -0,0 +1,4 @@ +from ..A import B +import A +b = B('str') +c = A.B('sss') diff --git a/example/nested_functions_code/nested_function_calls.py b/example/nested_functions_code/nested_function_calls.py new file mode 100644 index 00000000..f03398c0 --- /dev/null +++ b/example/nested_functions_code/nested_function_calls.py @@ -0,0 +1 @@ +abc = print(foo('bar')) diff --git a/example/vulnerable_code/command_injection.py b/example/vulnerable_code/command_injection.py index 90c65804..44c45d92 100644 --- a/example/vulnerable_code/command_injection.py +++ b/example/vulnerable_code/command_injection.py @@ -1,5 +1,5 @@ -from flask import Flask, request, render_template import subprocess +from flask import Flask, render_template, request app = Flask(__name__) @@ -28,7 +28,7 @@ def clean(): with open('menu.txt','r') as f: menu = f.read() - + return render_template('command_injection.html', menu=menu) if __name__ == '__main__': diff --git a/example/vulnerable_code/inter_command_injection.py b/example/vulnerable_code/inter_command_injection.py new file mode 100644 index 00000000..8cb2516d --- /dev/null +++ b/example/vulnerable_code/inter_command_injection.py @@ -0,0 +1,38 @@ +import subprocess +from flask import Flask, render_template, request + + +app = Flask(__name__) + +@app.route('/') +def index(): + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +def shell_the_arg(arg): + subprocess.call(arg, shell=True) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + shell_the_arg('echo ' + param + ' >> ' + 'menu.txt') + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +@app.route('/clean') +def clean(): + subprocess.call('echo Menu: > menu.txt', shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code/inter_command_injection_2.py b/example/vulnerable_code/inter_command_injection_2.py new file mode 100644 index 00000000..72c8062e --- /dev/null +++ b/example/vulnerable_code/inter_command_injection_2.py @@ -0,0 +1,39 @@ +import subprocess +from flask import Flask, render_template, request + + +app = Flask(__name__) + +@app.route('/') +def index(): + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +def return_the_arg(foo): + return foo + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = return_the_arg('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +@app.route('/clean') +def clean(): + subprocess.call('echo Menu: > menu.txt', shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/absolute_from_file_command_injection.py b/example/vulnerable_code_across_files/absolute_from_file_command_injection.py new file mode 100644 index 00000000..0f22a1f7 --- /dev/null +++ b/example/vulnerable_code_across_files/absolute_from_file_command_injection.py @@ -0,0 +1,20 @@ +from flask import Flask, render_template, request + +from other_file import shell_the_arg + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + shell_the_arg('echo ' + param + ' >> ' + 'menu.txt') + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/absolute_from_file_command_injection_2.py b/example/vulnerable_code_across_files/absolute_from_file_command_injection_2.py new file mode 100644 index 00000000..916bf096 --- /dev/null +++ b/example/vulnerable_code_across_files/absolute_from_file_command_injection_2.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +from other_file import return_the_arg + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = return_the_arg('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/absolute_from_file_does_not_exist.py b/example/vulnerable_code_across_files/absolute_from_file_does_not_exist.py new file mode 100644 index 00000000..48202718 --- /dev/null +++ b/example/vulnerable_code_across_files/absolute_from_file_does_not_exist.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +from other_file import does_not_exist + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = does_not_exist('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/import_file_command_injection.py b/example/vulnerable_code_across_files/import_file_command_injection.py new file mode 100644 index 00000000..c3ea7b76 --- /dev/null +++ b/example/vulnerable_code_across_files/import_file_command_injection.py @@ -0,0 +1,20 @@ +from flask import Flask, render_template, request + +import other_file + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + other_file.shell_the_arg('echo ' + param + ' >> ' + 'menu.txt') + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/import_file_command_injection_2.py b/example/vulnerable_code_across_files/import_file_command_injection_2.py new file mode 100644 index 00000000..e915fa49 --- /dev/null +++ b/example/vulnerable_code_across_files/import_file_command_injection_2.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +import other_file + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = other_file.return_the_arg('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/import_file_does_not_exist.py b/example/vulnerable_code_across_files/import_file_does_not_exist.py new file mode 100644 index 00000000..570cc4c0 --- /dev/null +++ b/example/vulnerable_code_across_files/import_file_does_not_exist.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +import other_file + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = other_file.does_not_exist('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py b/example/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py new file mode 100644 index 00000000..a0fa7265 --- /dev/null +++ b/example/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +from other_file import return_constant_string + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = return_constant_string('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py b/example/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py new file mode 100644 index 00000000..259a71a1 --- /dev/null +++ b/example/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py @@ -0,0 +1,22 @@ +import subprocess +from flask import Flask, render_template, request + +import other_file + + +app = Flask(__name__) + +@app.route('/menu', methods=['POST']) +def menu(): + param = request.form['suggestion'] + + command = other_file.return_constant_string('echo ' + param + ' >> ' + 'menu.txt') + subprocess.call(command, shell=True) + + with open('menu.txt','r') as f: + menu = f.read() + + return render_template('command_injection.html', menu=menu) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code_across_files/other_file.py b/example/vulnerable_code_across_files/other_file.py new file mode 100644 index 00000000..f006c61f --- /dev/null +++ b/example/vulnerable_code_across_files/other_file.py @@ -0,0 +1,12 @@ +import subprocess + +def return_constant_string(easy_to_find_in_logs): + no_vuln = "This is not a vuln" + return no_vuln + +def return_the_arg(easy_to_find_in_logs): + hehe = 'bar' + easy_to_find_in_logs + return hehe + +def shell_the_arg(easy_to_find_in_logs): + subprocess.call(easy_to_find_in_logs, shell=True) diff --git a/func_counter.py b/func_counter.py index c2f5f59c..82b4dc55 100644 --- a/func_counter.py +++ b/func_counter.py @@ -2,8 +2,8 @@ to get an estimate og how big the CFG should be""" import ast -from pyt.cfg import get_call_names_as_string, generate_ast -from pyt.project_handler import get_python_modules +from pyt.cfg import generate_ast, get_call_names_as_string +from pyt.project_handler import get_modules function_calls = list() @@ -34,7 +34,7 @@ def visit_ClassDef(self, node): if __name__ == '__main__': - module_paths = (m[1] for m in get_python_modules('../flaskbb/flaskbb')) + module_paths = (m[1] for m in get_modules('../flaskbb/flaskbb')) for p in module_paths: print(p) t = generate_ast(p) diff --git a/pydocstyle.py b/pydocstyle.py index 618efad2..6a8d9ff3 100644 --- a/pydocstyle.py +++ b/pydocstyle.py @@ -1,7 +1,7 @@ +import os import re import subprocess import sys -import os os.chdir(os.path.join('pyt')) diff --git a/pyt/__main__.py b/pyt/__main__.py index 694a461e..022c85bc 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -16,7 +16,7 @@ from .intraprocedural_cfg import intraprocedural from .lattice import print_lattice from .liveness import LivenessAnalysis -from .project_handler import get_directory_modules, get_python_modules +from .project_handler import get_directory_modules, get_modules from .reaching_definitions import ReachingDefinitionsAnalysis from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .repo_runner import get_repos @@ -143,7 +143,7 @@ def analyse_repo(github_repo, analysis_type): cfg_list = list() - project_modules = get_python_modules(os.path.dirname(github_repo.path)) + project_modules = get_modules(os.path.dirname(github_repo.path)) intraprocedural(project_modules, cfg_list) initialize_constraint_table(cfg_list) analyse(cfg_list, analysis_type=analysis_type) @@ -192,7 +192,7 @@ def main(): directory = os.path.normpath(args.project_root) else: directory = os.path.dirname(path) - project_modules = get_python_modules(directory) + project_modules = get_modules(directory) local_modules = get_directory_modules(directory) tree = generate_ast(path) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index e6e57a38..89957a56 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -31,13 +31,12 @@ def __init__(self, label, ast_node, *, line_number, path): label (str): The label of the node, describing its expression. line_number(Optional[int]): The line of the expression of the Node. """ - self.ingoing = list() - self.outgoing = list() - self.label = label self.ast_node = ast_node self.line_number = line_number self.path = path + self.ingoing = list() + self.outgoing = list() def connect(self, successor): """Connect this node to its successor node by @@ -56,7 +55,8 @@ def connect_predecessors(self, predecessors): def __str__(self): """Print the label of the node.""" - return ' '.join(('Label: ', self.label)) + return ''.join((' Label: ', self.label)) + def __repr__(self): """Print a representation of the node.""" @@ -90,7 +90,7 @@ class FunctionNode(Node): def __init__(self, ast_node): """Create a function node. - This node is a dummy node representing a function definition + This node is a dummy node representing a function definition. """ super().__init__(self.__class__.__name__, ast_node) @@ -162,7 +162,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, ast_node, * Args: label (str): The label of the node, describing the expression it represents. - restore_nodes(list[Node]): List of nodes that where restored in the function call. + restore_nodes(list[Node]): List of nodes that were restored in the function call. right_hand_side_variables(list[str]): A list of variables on the right hand side. line_number(Optional[int]): The line of the expression the Node represents. """ @@ -273,7 +273,7 @@ def get_last_statements(self, cfg_statements): def stmt_star_handler(self, stmts): """Handle stmt* expressions in an AST node. - Links all statements together in a list of statements, accounting for statements with multiple last nodes + Links all statements together in a list of statements, accounting for statements with multiple last nodes. """ cfg_statements = list() break_nodes = list() @@ -286,7 +286,7 @@ def stmt_star_handler(self, stmts): elif isinstance(node, BreakNode): break_nodes.append(node) - if self.node_to_connect(node): + if self.node_to_connect(node) and node: cfg_statements.append(node) self.connect_nodes(cfg_statements) @@ -495,7 +495,6 @@ def visit_Assign(self, node): return self.assign_multi_target(node, rhs_visitor.result) else: if isinstance(node.value, ast.Call): # x = call() - label = LabelVisitor() label.visit(node.targets[0]) return self.assignment_call_node(label.result, node) @@ -602,11 +601,12 @@ def visit_Expr(self, node): def add_builtin(self, node): label = LabelVisitor() label.visit(node) - builtin_call = Node(label.result, node, line_number=node.lineno, path=self.filenames[-1]) + builtin_call = Node(label.result, node, line_number=node.lineno, path=self.filenames[-1]) if not self.undecided: self.nodes.append(builtin_call) self.undecided = False + return builtin_call def visit_Name(self, node): diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index 19b20911..6427d281 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -1,4 +1,5 @@ import ast +import os.path from collections import namedtuple from .ast_helper import Arguments, generate_ast, get_call_names_as_string @@ -77,7 +78,7 @@ def init_function_cfg(self, node, module_definitions): self.function_names.append(node.name) self.function_return_stack.append(node.name) - entry_node = self.append_node(EntryOrExitNode("Entry module")) + entry_node = self.append_node(EntryOrExitNode("Entry function")) module_statements = self.stmt_star_handler(node.body) @@ -86,7 +87,7 @@ def init_function_cfg(self, node, module_definitions): if CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode("Exit function")) last_nodes = module_statements.last_statements exit_node.connect_predecessors(last_nodes) @@ -132,7 +133,7 @@ def add_to_definitions(self, node): local_definitions.module_name, self.filenames[-1]) parent_definition.node = node - parent_definitions.append(parent_definition) + parent_definitions.append_if_local_or_in_imports(parent_definition) local_qualified_name = '.'.join(local_definitions.classes + [node.name]) @@ -141,7 +142,7 @@ def add_to_definitions(self, node): None, self.filenames[-1]) local_definition.node = node - local_definitions.append(local_definition) + local_definitions.append_if_local_or_in_imports(local_definition) self.function_names.append(node.name) @@ -218,9 +219,12 @@ def save_actual_parameters_in_temp(self, args, arguments, line_number): label_visitor = LabelVisitor() label_visitor.visit(parameter) + rhs_visitor = RHSVisitor() + rhs_visitor.visit(parameter) + n = RestoreNode(temp_name + ' = ' + label_visitor.result, temp_name, - [label_visitor.result], + rhs_visitor.result, line_number=line_number, path=self.filenames[-1]) @@ -326,7 +330,7 @@ def add_function(self, call_node, definition): def get_function_nodes(self, definition): length = len(self.nodes) previous_node = self.nodes[-1] - entry_node = self.append_node(EntryOrExitNode("Entry " + + entry_node = self.append_node(EntryOrExitNode("Function Entry " + definition.name)) previous_node.connect(entry_node) function_body_connect_statements = self.stmt_star_handler(definition.node.body) @@ -345,7 +349,15 @@ def visit_Call(self, node): self.function_return_stack.append(_id) local_definitions = self.module_definitions_stack[-1] - definition = local_definitions.get_definition(_id) + + real_id = _id + for k in local_definitions.import_alias_mapping.keys(): + if _id.startswith(k): + real_id = _id.replace(k, local_definitions.import_alias_mapping[k]) + definition = local_definitions.get_definition(real_id) + break + if real_id == _id: + definition = local_definitions.get_definition(_id) if definition: if isinstance(definition.node, ast.ClassDef): @@ -364,7 +376,7 @@ def add_class(self, call_node, def_node): previous_node = self.nodes[-1] - entry_node = self.append_node(EntryOrExitNode("Entry " + def_node.name)) + entry_node = self.append_node(EntryOrExitNode("Class Entry " + def_node.name)) previous_node.connect(entry_node) @@ -379,38 +391,79 @@ def add_class(self, call_node, def_node): line_number=call_node.lineno, path=self.filenames[-1]) - def add_module(self, module, module_name, local_names): + def retrieve_import_alias_mapping(self, names_list): + """Creates a dictionary mapping aliases to their respective name. + import_alias_names is used in module_definitions.py and visit_Call""" + import_alias_names = {} + + for alias in names_list: + if alias.asname: + import_alias_names[alias.asname] = alias.name + return import_alias_names + + def add_file_module(self, module, module_name, local_names, import_alias_mapping): module_path = module[1] self.filenames.append(module_path) self.local_modules = get_directory_modules(module_path) tree = generate_ast(module_path) parent_definitions = self.module_definitions_stack[-1] + parent_definitions.import_alias_mapping = import_alias_mapping parent_definitions.import_names = local_names module_definitions = ModuleDefinitions(local_names, module_name) self.module_definitions_stack.append(module_definitions) - self.append_node(EntryOrExitNode('Entry ' + module[0])) + self.append_node(EntryOrExitNode('Module Entry ' + module[0])) self.visit(tree) - exit_node = self.append_node(EntryOrExitNode('Exit ' + module[0])) + exit_node = self.append_node(EntryOrExitNode('Module Exit ' + module[0])) self.module_definitions_stack.pop() self.filenames.pop() return exit_node + def add_directory_module(self, module, real_names, local_names, import_alias_mapping): + module_path = module[1] + + for real_name in real_names: + file_module = (real_name, os.path.join(module_path, real_name + '.py')) + self.add_file_module(file_module, real_name, local_names, import_alias_mapping) + + def import_directory_module(self, module, import_alias_mapping): + module_path = module[1] + init_file_location = os.path.join(module_path, '__init__.py') + file_exists = os.path.isfile(init_file_location) + if file_exists: + raise Exception("TODO: Handle __init__ files") + else: + raise Exception("import directory needs an __init__.py file") + def visit_Import(self, node): for name in node.names: for module in self.local_modules: if name.name == module[0]: - return self.add_module(module, name.name, name.asname) + if os.path.isdir(module[1]): + return self.import_directory_module(module, + self.retrieve_import_alias_mapping(node.names)) + else: + return self.add_file_module(module, + name.name, + name.asname, + self.retrieve_import_alias_mapping(node.names)) for module in self.project_modules: if name.name == module[0]: - return self.add_module(module, name.name, name.asname) + if os.path.isdir(module[1]): + return self.import_directory_module(module, + self.retrieve_import_alias_mapping(node.names)) + else: + return self.add_file_module(module, + name.name, + name.asname, + self.retrieve_import_alias_mapping(node.names)) return IgnoredNode() - def alias_handler(self, alias_list): + def as_alias_handler(self, alias_list): l = list() for alias in alias_list: if alias.asname: @@ -419,15 +472,86 @@ def alias_handler(self, alias_list): l.append(alias.name) return l + def not_as_alias_handler(self, names_list): + l = list() + for alias in names_list: + l.append(alias.name) + return l + + def handle_relative_import(self, node): + """ + from A means node.level == 0 + from . import B means node.level == 1 + from .A means node.level == 1 + """ + is_file = False + + no_file = os.path.abspath(os.path.join(self.filenames[-1], os.pardir)) + + if node.level == 1: + # Same directory as current file + if node.module: + name_with_dir = os.path.join(no_file, node.module.replace('.', '/')) + if not os.path.isdir(name_with_dir): + name_with_dir = name_with_dir + '.py' + is_file = True + # e.g. from . import X + else: + name_with_dir = no_file + else: + parent = os.path.abspath(os.path.join(no_file, os.pardir)) + + if node.level > 2: + level = node.level + while level > 2: + parent = os.path.abspath(os.path.join(parent, os.pardir)) + level = level - 1 + if node.module: + name_with_dir = os.path.join(parent, node.module.replace('.', '/')) + if not os.path.isdir(name_with_dir): + name_with_dir = name_with_dir + '.py' + is_file = True + # e.g. from .. import X + else: + name_with_dir = parent + + if is_file: + return self.add_file_module((node.module, name_with_dir), None, + self.as_alias_handler(node.names), + self.retrieve_import_alias_mapping(node.names)) + else: + return self.add_directory_module((node.module, name_with_dir), + self.not_as_alias_handler(node.names), + self.as_alias_handler(node.names), + self.retrieve_import_alias_mapping(node.names)) + def visit_ImportFrom(self, node): - for module in self.local_modules: - if node.module == module[0]: - return self.add_module(module, None, - self.alias_handler(node.names)) - for module in self.project_modules: - if node.module == module[0]: - return self.add_module(module, None, - self.alias_handler(node.names)) + # Is it relative? + if node.level > 0: + return self.handle_relative_import(node) + else: + for module in self.local_modules: + if node.module == module[0]: + if os.path.isdir(module[1]): + return self.add_directory_module(module, + self.not_as_alias_handler(node.names), + self.as_alias_handler(node.names)) + else: + return self.add_file_module(module, None, + self.as_alias_handler(node.names), + self.retrieve_import_alias_mapping(node.names)) + for module in self.project_modules: + name = module[0] + if node.module == name: + if os.path.isdir(module[1]): + return self.add_directory_module(module, + self.not_as_alias_handler(node.names), + self.as_alias_handler(node.names), + self.retrieve_import_alias_mapping(node.names)) + else: + return self.add_file_module(module, None, + self.as_alias_handler(node.names), + self.retrieve_import_alias_mapping(node.names)) return IgnoredNode() diff --git a/pyt/module_definitions.py b/pyt/module_definitions.py index cd27783d..c0c1c45d 100644 --- a/pyt/module_definitions.py +++ b/pyt/module_definitions.py @@ -2,7 +2,6 @@ project_definitions = dict() # Contains all project definitions for a program run - class ModuleDefinition(): """Handling of a definition.""" @@ -12,13 +11,14 @@ class ModuleDefinition(): module_definitions = None def __init__(self, local_module_definitions, name, parent_module_name, path): + self.module_definitions = local_module_definitions + self.parent_module_name = parent_module_name + self.path = path + if parent_module_name: self.name = parent_module_name + '.' + name else: self.name = name - self.path = path - - self.module_definitions = local_module_definitions def __str__(self): name = 'NoName' @@ -32,7 +32,6 @@ def __str__(self): class LocalModuleDefinition(ModuleDefinition): """A local definition.""" - pass @@ -47,24 +46,32 @@ def __init__(self, import_names=None, module_name=None): Module name should only be set when it is a normal import statement. """ - self.definitions = list() + self.import_names = import_names self.module_name = module_name + self.definitions = list() self.classes = list() - self.import_names = import_names + self.import_alias_mapping = {} + - def append(self, definition): + def append_if_local_or_in_imports(self, definition): """Add definition to list. - Handles localdefinitions and adds to project_definitions. + Handles local definitions and adds to project_definitions. """ if isinstance(definition, LocalModuleDefinition): self.definitions.append(definition) elif self.import_names and definition.name in self.import_names: self.definitions.append(definition) + elif self.import_alias_mapping and definition.name in self.import_alias_mapping.values(): + self.definitions.append(definition) - if not definition.node in project_definitions: + if definition.parent_module_name: + self.definitions.append(definition) + + if definition.node not in project_definitions: project_definitions[definition.node] = definition + def is_import(self): """Return whether it is a normal import statement and not a from import. @@ -77,8 +84,8 @@ def get_definition(self, name): for definition in self.definitions: if definition.name == name: return definition - - def set_defintion_node(self, node, name): + + def set_definition_node(self, node, name): """Set definition by name.""" definition = self.get_definition(name) if definition: @@ -88,7 +95,8 @@ def __str__(self): module = 'NoModuleName' if self.module_name: module = self.module_name - - return 'Definitions: ' + ' '.join([str(definition) for definition in self.definitions]) + '\nmodule_name: ' + module + '\n' - + if self.definitions: + return 'Definitions: "' + '", "'.join([str(definition) for definition in self.definitions]) + '" and module_name: ' + module + '\n' + else: + return 'No Definitions, module_name: ' + module + '\n' diff --git a/pyt/project_handler.py b/pyt/project_handler.py index 7993ed38..3395af41 100644 --- a/pyt/project_handler.py +++ b/pyt/project_handler.py @@ -1,19 +1,16 @@ """Generates a list of CFGs from a path. The module finds all python modules and generates an ast for them. -Then """ import ast import os -def is_python_module(path): - if os.path.splitext(path)[1] == '.py': - return True - return False - local_modules = list() def get_directory_modules(directory, flush_local_modules=False): + """Return a list containing tuples of + e.g. ('__init__', 'example/import_test_project/__init__.py') + """ if local_modules and os.path.dirname(local_modules[0][1]) == directory: return local_modules @@ -21,24 +18,29 @@ def get_directory_modules(directory, flush_local_modules=False): del local_modules[:] if not os.path.isdir(directory): + # example/import_test_project/A.py -> example/import_test_project directory = os.path.dirname(directory) if directory == '': return local_modules for path in os.listdir(directory): - if is_python_module(path): + if is_python_file(path): + # A.py -> A module_name = os.path.splitext(path)[0] local_modules.append((module_name, os.path.join(directory, path))) return local_modules -def get_python_modules(path): +def get_modules(path): + """Return a list containing tuples of + e.g. ('test_project.utils', 'example/test_project/utils.py') + """ module_root = os.path.split(path)[1] modules = list() for root, directories, filenames in os.walk(path): for filename in filenames: - if is_python_module(filename): + if is_python_file(filename): directory = os.path.dirname(os.path.realpath(os.path.join(root, filename))).split(module_root)[-1].replace(os.sep, '.') directory = directory.replace('.', '', 1) if directory: @@ -48,16 +50,34 @@ def get_python_modules(path): return modules -def get_project_module_names(path): - project_modules = get_python_modules(path) - project_module_names = list() - for project_module in project_modules: - project_module_names.append(project_module[0]) - return project_module_names +def get_modules_and_packages(path): + """Return a list containing tuples of + e.g. ('folder', 'example/test_project/folder', '.folder') + ('test_project.utils', 'example/test_project/utils.py') + """ + module_root = os.path.split(path)[1] + modules = list() + for root, directories, filenames in os.walk(path): + for directory in directories: + if directory != '__pycache__': + full_path = os.path.join(root, directory) + relative_path = os.path.realpath(full_path).split(module_root)[-1].replace(os.sep, '.') + # Remove the dot in front to be consistent + modules.append((relative_path[1:], full_path, relative_path)) + + for filename in filenames: + if is_python_file(filename): + full_path = os.path.join(root, filename) + directory = os.path.dirname(os.path.realpath(full_path)).split(module_root)[-1].replace(os.sep, '.') + directory = directory.replace('.', '', 1) + if directory: + modules.append(('.'.join((module_root, directory, filename.replace('.py', ''))), full_path)) + else: + modules.append(('.'.join((module_root, filename.replace('.py', ''))), full_path)) -def is_directory(path): - if os.path.isdir(path): + return modules + +def is_python_file(path): + if os.path.splitext(path)[1] == '.py': return True - elif is_python_module(path): - return False - raise Exception(path, ' has to be a python module or a directory.') + return False diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 5313d43b..3579ceb6 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -3,20 +3,20 @@ import ast from collections import namedtuple -from .base_cfg import Node, AssignmentNode, ReturnNode +from .base_cfg import AssignmentNode, Node, ReturnNode from .framework_adaptor import TaintedNode +from .lattice import Lattice +from .trigger_definitions_parser import default_trigger_word_file, parse +from .vars_visitor import VarsVisitor from .vulnerability_log import ( + SanitisedVulnerability, Vulnerability, - VulnerabilityLog, - SanitisedVulnerability + VulnerabilityLog ) -from .lattice import Lattice -from .vars_visitor import VarsVisitor -from .trigger_definitions_parser import default_trigger_word_file, parse -Triggers = namedtuple('Triggers', 'sources sinks sanitiser_dict') Sanitiser = namedtuple('Sanitiser', 'trigger_word cfg_node') +Triggers = namedtuple('Triggers', 'sources sinks sanitiser_dict') class TriggerNode(): @@ -51,7 +51,6 @@ def identify_triggers(cfg, sources, sinks): tainted_nodes = filter_cfg_nodes(cfg, TaintedNode) tainted_trigger_nodes = [TriggerNode('Flask function URL parameter', None, node) for node in tainted_nodes] - sources_in_file = find_triggers(assignment_nodes, sources) sources_in_file.extend(tainted_trigger_nodes) @@ -69,8 +68,6 @@ def filter_cfg_nodes(cfg, cfg_node_type): def find_secondary_sources(assignment_nodes, sources): - assignments = dict() - for source in sources: source.secondary_nodes = find_assignments(assignment_nodes, source) @@ -78,7 +75,7 @@ def find_secondary_sources(assignment_nodes, sources): def find_assignments(assignment_nodes, source): old = list() - # added in order to propagate reassignments of the source node: + # added in order to propagate reassignments of the source node new = [source.cfg_node] update_assignments(new, assignment_nodes, source.cfg_node) @@ -194,7 +191,7 @@ def is_sanitized(sink, sanitiser_dict, lattice): sanitiser_dict(dict): dictionary of sink sanitiser pairs. Returns: - True of False + True or False """ for sanitiser in sink.sanitisers: for cfg_node in sanitiser_dict[sanitiser]: @@ -233,14 +230,16 @@ def get_vulnerability(source, sink, triggers, lattice): secondary_in_sink = [secondary for secondary in source.secondary_nodes if lattice.in_constraint(secondary, sink.cfg_node)] + trigger_node_in_sink = source_in_sink or secondary_in_sink sink_args = get_sink_args(sink.cfg_node) source_lhs_in_sink_args = source.cfg_node.left_hand_side in sink_args\ if sink_args else None + secondary_nodes_in_sink_args = any(True for node in secondary_in_sink if node.left_hand_side in sink_args)\ - if sink_args else None + if sink_args else None lhs_in_sink_args = source_lhs_in_sink_args or secondary_nodes_in_sink_args if trigger_node_in_sink and lhs_in_sink_args: @@ -249,15 +248,15 @@ def get_vulnerability(source, sink, triggers, lattice): sink_is_sanitised = is_sanitized(sink, triggers.sanitiser_dict, lattice) - if not sink_is_sanitised: - return Vulnerability(source.cfg_node, source_trigger_word, - sink.cfg_node, sink_trigger_word, - source.secondary_nodes) - elif sink_is_sanitised: + if sink_is_sanitised: return SanitisedVulnerability(source.cfg_node, source_trigger_word, sink.cfg_node, sink_trigger_word, sink.sanitisers, source.secondary_nodes) + else: + return Vulnerability(source.cfg_node, source_trigger_word, + sink.cfg_node, sink_trigger_word, + source.secondary_nodes) return None diff --git a/pyt/vulnerability_log.py b/pyt/vulnerability_log.py index e1d843f9..f2d5a2f6 100644 --- a/pyt/vulnerability_log.py +++ b/pyt/vulnerability_log.py @@ -1,7 +1,7 @@ """This module contains a vulnerability log. This log is able to give precise information about where a vulnerability is located. -The log is printed to the standard output. +The log is printed to standard output. """ class VulnerabilityLog(): diff --git a/tests/base_test_case.py b/tests/base_test_case.py index 8b79bb11..db2c5c46 100644 --- a/tests/base_test_case.py +++ b/tests/base_test_case.py @@ -10,10 +10,10 @@ class BaseTestCase(unittest.TestCase): """A base class that has helper methods for testing PyT.""" def assertInCfg(self, connections): - ''' Assert that all connections in the connections list exists in the cfg, + """ Assert that all connections in the connections list exists in the cfg, as well as all connections not in the list do not exist - connections is a list of tuples where the node at index 0 of the tuple has to be in the new_constraintset of the node a index 1 of the tuple''' + connections is a list of tuples where the node at index 0 of the tuple has to be in the new_constraintset of the node a index 1 of the tuple""" for connection in connections: self.assertIn(self.cfg.nodes[connection[0]], self.cfg.nodes[connection[1]].outgoing, str(connection) + " expected to be connected") self.assertIn(self.cfg.nodes[connection[1]], self.cfg.nodes[connection[0]].ingoing, str(connection) + " expected to be connected") @@ -27,9 +27,9 @@ def assertInCfg(self, connections): self.assertNotIn(self.cfg.nodes[sets], self.cfg.nodes[element].ingoing, "(%s <- %s)" % (sets, element) + " expected to be disconnected") def assertConnected(self, node, successor): - '''Asserts that a node is connected to its successor. + """Asserts that a node is connected to its successor. This means that node has successor in its outgoing and - successor has node in its ingoing.''' + successor has node in its ingoing.""" self.assertIn(successor, node.outgoing, '\n%s was NOT found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']') @@ -38,9 +38,9 @@ def assertConnected(self, node, successor): '\n%s was NOT found in the ingoing list of %s containing: ' % (node.label, successor.label) + '[' + ', '.join([x.label for x in successor.ingoing]) + ']') def assertNotConnected(self, node, successor): - '''Asserts that a node is not connected to its successor. + """Asserts that a node is not connected to its successor. This means that node does not the successor in its outgoing and - successor does not have the node in its ingoing.''' + successor does not have the node in its ingoing.""" self.assertNotIn(successor, node.outgoing, '\n%s was mistakenly found in the outgoing list of %s containing: ' % (successor.label, node.label) + '[' + ', '.join([x.label for x in node.outgoing]) + ']') @@ -52,8 +52,8 @@ def assertLineNumber(self, node, line_number): self.assertEqual(node.line_number, line_number) def cfg_list_to_dict(self, list): - '''This method converts the CFG list to a dict, making it easier to find nodes to test. - This method assumes that no nodes in the code have the same label''' + """This method converts the CFG list to a dict, making it easier to find nodes to test. + This method assumes that no nodes in the code have the same label""" return {x.label: x for x in list} def assert_length(self, _list, *, expected_length): diff --git a/tests/cfg_test.py b/tests/cfg_test.py index d5370ef5..7d5e7b83 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -1,6 +1,6 @@ from .base_test_case import BaseTestCase from pyt.base_cfg import EntryOrExitNode, Node -from pyt.project_handler import get_python_modules +# from pyt.project_handler import get_modules class CFGGeneralTest(BaseTestCase): @@ -707,9 +707,9 @@ def test_function_multiple_return(self): def test_function_line_numbers_2(self): path = 'example/example_inputs/simple_function_with_return.py' self.cfg_create_from_file(path) -# self.cfg = CFG(get_python_modules(path)) - # tree = generate_ast(path) - # self.cfg.create(tree) + # self.cfg = CFG(get_modules(path)) + # tree = generate_ast(path) + # self.cfg.create(tree) assignment_with_function = self.cfg.nodes[1] diff --git a/tests/import_test.py b/tests/import_test.py index 7312e196..4ef870d3 100644 --- a/tests/import_test.py +++ b/tests/import_test.py @@ -2,22 +2,395 @@ import os from .base_test_case import BaseTestCase -from pyt.base_cfg import get_call_names_as_string -from pyt.project_handler import get_directory_modules, get_python_modules +from pyt.ast_helper import get_call_names_as_string +from pyt.project_handler import get_directory_modules, get_modules_and_packages class ImportTest(BaseTestCase): def test_import(self): - path = os.path.normpath('example/import_test_project/main.py') + path = os.path.normpath('example/import_test_project/import.py') - project_modules = get_python_modules(os.path.dirname(path)) + project_modules = get_modules_and_packages(os.path.dirname(path)) local_modules = get_directory_modules(os.path.dirname(path)) self.cfg_create_from_file(path, project_modules, local_modules) - cfg_list = [self.cfg] + EXPECTED = ["Entry module", + "Module Entry A", + "Module Exit A", + "Module Entry A", + "Module Exit A", + "temp_1_s = 'str'", + "s = temp_1_s", + "Function Entry B", + "ret_B = s", + "Exit B", + "¤call_1 = ret_B", + "b = ¤call_1", + "save_2_b = b", + "temp_2_s = 'sss'", + "s = temp_2_s", + "Function Entry A.B", + "ret_A.B = s", + "Exit A.B", + "b = save_2_b", + "¤call_2 = ret_A.B", + "c = ¤call_2", + "Exit module"] - #adaptor_type = FlaskAdaptor(cfg_list) + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_import_as(self): + path = os.path.normpath('example/import_test_project/import_as.py') + + project_modules = get_modules_and_packages(os.path.dirname(path)) + local_modules = get_directory_modules(os.path.dirname(path)) + + self.cfg_create_from_file(path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry A", + "Module Exit A", + "Module Entry A", + "Module Exit A", + "temp_1_s = 'str'", + "s = temp_1_s", + "Function Entry B", + "ret_B = s", + "Exit B", + "¤call_1 = ret_B", + "b = ¤call_1", + "save_2_b = b", + "temp_2_s = 'sss'", + "s = temp_2_s", + "Function Entry A.B", + "ret_foo.B = s", + "Exit A.B", + "b = save_2_b", + "¤call_2 = ret_foo.B", + "c = ¤call_2", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_from_directory(self): + file_path = os.path.normpath('example/import_test_project/from_directory.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + + EXPECTED = ["Entry module", + "Module Entry bar", + "Module Exit bar", + "temp_1_s = 'hey'", + "s = temp_1_s", + "Function Entry bar.H", + "ret_bar.H = s + 'end'", + "Exit bar.H", + "¤call_1 = ret_bar.H", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_relative_level_1(self): + path = os.path.normpath('example/import_test_project/relative_level_1.py') + + project_modules = get_modules_and_packages(os.path.dirname(path)) + local_modules = get_directory_modules(os.path.dirname(path)) + + self.cfg_create_from_file(path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry A", + "Module Exit A", + "Module Entry A", + "Module Exit A", + "temp_1_s = 'str'", + "s = temp_1_s", + "Function Entry B", + "ret_B = s", + "Exit B", + "¤call_1 = ret_B", + "b = ¤call_1", + "save_2_b = b", + "temp_2_s = 'sss'", + "s = temp_2_s", + "Function Entry A.B", + "ret_A.B = s", + "Exit A.B", + "b = save_2_b", + "¤call_2 = ret_A.B", + "c = ¤call_2", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_relative_level_2(self): + path = os.path.normpath('example/import_test_project/relative_level_2.py') + + project_modules = get_modules_and_packages(os.path.dirname(path)) + local_modules = get_directory_modules(os.path.dirname(path)) + + try: + self.cfg_create_from_file(path, project_modules, local_modules) + except Exception as e: + self.assertTrue("OSError('Input needs to be a file. Path: " in repr(e)) + self.assertTrue("example/A.py" in repr(e)) + + def test_relative_between_folders(self): + file_path = os.path.normpath('example/import_test_project/other_dir/relative_between_folders.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry foo.bar", + "Module Exit foo.bar", + "temp_1_s = 'hey'", + "s = temp_1_s", + "Function Entry H", + "ret_H = s + 'end'", + "Exit H", + "¤call_1 = ret_H", + "result = ¤call_1", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_relative_from_directory(self): + file_path = os.path.normpath('example/import_test_project/relative_from_directory.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry bar", + "Module Exit bar", + "temp_1_s = 'hey'", + "s = temp_1_s", + "Function Entry bar.H", + "ret_bar.H = s + 'end'", + "Exit bar.H", + "¤call_1 = ret_bar.H", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_from_dot(self): + file_path = os.path.normpath('example/import_test_project/from_dot.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ['Entry module', + 'Module Entry A', + 'Module Exit A', + 'temp_1_s = \'sss\'', + 's = temp_1_s', + 'Function Entry A.B', + 'ret_A.B = s', + 'Exit A.B', + '¤call_1 = ret_A.B', + 'c = ¤call_1', + 'Exit module'] + + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_from_dot_dot(self): + file_path = os.path.normpath('example/import_test_project/other_dir/from_dot_dot.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ['Entry module', + 'Module Entry A', + 'Module Exit A', + 'temp_1_s = \'sss\'', + 's = temp_1_s', + 'Function Entry A.B', + 'ret_A.B = s', + 'Exit A.B', + '¤call_1 = ret_A.B', + 'c = ¤call_1', + 'Exit module'] + + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_multiple_files_with_aliases(self): + file_path = os.path.normpath('example/import_test_project/multiple_files_with_aliases.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry A", + "Module Exit A", + "Module Entry B", + "Module Exit B", + "Module Entry C", + "Module Exit C", + "Module Entry D", + "Module Exit D", + "temp_1_s = 'tlayuda'", + "s = temp_1_s", + "Function Entry A.cosme", + "ret_A.cosme = s + 'aaa'", + "Exit A.cosme", + "¤call_1 = ret_A.cosme", + "a = ¤call_1", + "save_2_a = a", + "temp_2_s = 'mutton'", + "s = temp_2_s", + "Function Entry B.foo", + "ret_keens.foo = s + 'bee'", + "Exit B.foo", + "a = save_2_a", + "¤call_2 = ret_keens.foo", + "b = ¤call_2", + "save_3_a = a", + "save_3_b = b", + "temp_3_s = 'tasting'", + "s = temp_3_s", + "Function Entry C.foo", + "ret_per_se.foo = s + 'see'", + "Exit C.foo", + "a = save_3_a", + "b = save_3_b", + "¤call_3 = ret_per_se.foo", + "c = ¤call_3", + "save_4_a = a", + "save_4_b = b", + "save_4_c = c", + "temp_4_s = 'peking'", + "s = temp_4_s", + "Function Entry D.foo", + "ret_duck_house.foo = s + 'dee'", + "Exit D.foo", + "a = save_4_a", + "b = save_4_b", + "c = save_4_c", + "¤call_4 = ret_duck_house.foo", + "d = ¤call_4", + "Exit module"] + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + def test_multiple_functions_with_aliases(self): + file_path = os.path.normpath('example/import_test_project/multiple_functions_with_aliases.py') + project_path = os.path.normpath('example/import_test_project') + + project_modules = get_modules_and_packages(project_path) + local_modules = get_directory_modules(project_path) + + self.cfg_create_from_file(file_path, project_modules, local_modules) + + EXPECTED = ["Entry module", + "Module Entry A", + "Module Exit A", + "temp_1_s = 'mutton'", + "s = temp_1_s", + "Function Entry B", + "ret_keens = s", + "Exit B", + "¤call_1 = ret_keens", + "a = ¤call_1", + "save_2_a = a", + "temp_2_s = 'tasting'", + "s = temp_2_s", + "Function Entry C", + "ret_C = s + 'see'", + "Exit C", + "a = save_2_a", + "¤call_2 = ret_C", + "b = ¤call_2", + "save_3_a = a", + "save_3_b = b", + "temp_3_s = 'peking'", + "s = temp_3_s", + "Function Entry D", + "ret_duck_house = s + 'dee'", + "Exit D", + "a = save_3_a", + "b = save_3_b", + "¤call_3 = ret_duck_house", + "c = ¤call_3", + "Exit module"] + + + for node, expected_label in zip(self.cfg.nodes, EXPECTED): + self.assertEqual(node.label, expected_label) + + # def test_init(self): + # file_path = os.path.normpath('example/import_test_project/init.py') + # project_path = os.path.normpath('example/import_test_project') + + # project_modules = get_modules_and_packages(project_path) + # local_modules = get_directory_modules(project_path) + + # self.cfg_create_from_file(file_path, project_modules, local_modules) + + # EXPECTED = ['Not Yet'] + + # for node, expected_label in zip(self.cfg.nodes, EXPECTED): + # self.assertEqual(node.label, expected_label) + + # def test_all(self): + # file_path = os.path.normpath('example/import_test_project/all.py') + # project_path = os.path.normpath('example/import_test_project') + + # project_modules = get_modules_and_packages(project_path) + # local_modules = get_directory_modules(project_path) + + # self.cfg_create_from_file(file_path, project_modules, local_modules) + + # EXPECTED = ['Not Yet'] + + # for node, expected_label in zip(self.cfg.nodes, EXPECTED): + # self.assertEqual(node.label, expected_label) + + # def test_no_all(self): + # file_path = os.path.normpath('example/import_test_project/no_all.py') + # project_path = os.path.normpath('example/import_test_project') + + # project_modules = get_modules_and_packages(project_path) + # local_modules = get_directory_modules(project_path) + + # self.cfg_create_from_file(file_path, project_modules, local_modules) + + # EXPECTED = ['Not Yet'] + + # for node, expected_label in zip(self.cfg.nodes, EXPECTED): + # self.assertEqual(node.label, expected_label) def test_get_call_names_single(self): m = ast.parse('hi(a)') @@ -35,7 +408,6 @@ def test_get_call_names_uselesscase(self): self.assertEqual(result, 'defg.hi') - def test_get_call_names_multi(self): m = ast.parse('abc.defg.hi(a)') call = m.body[0].value diff --git a/tests/label_visitor_test.py b/tests/label_visitor_test.py index b4a6e837..6ee44bb4 100644 --- a/tests/label_visitor_test.py +++ b/tests/label_visitor_test.py @@ -5,7 +5,7 @@ class LabelVisitorTestCase(unittest.TestCase): - '''Baseclass for LabelVisitor tests''' + """Baseclass for LabelVisitor tests""" def perform_labeling_on_expression(self, expr): obj = ast.parse(expr) diff --git a/tests/nested_functions_test.py b/tests/nested_functions_test.py new file mode 100644 index 00000000..07b6cb15 --- /dev/null +++ b/tests/nested_functions_test.py @@ -0,0 +1,20 @@ +# import os.path + +# from .base_test_case import BaseTestCase +# from pyt.project_handler import get_directory_modules, get_modules_and_packages + + +# class NestedTest(BaseTestCase): + # def test_nested_function_calls(self): + + # path = os.path.normpath('example/nested_functions_code/nested_function_calls.py') + + # project_modules = get_modules_and_packages(os.path.dirname(path)) + # local_modules = get_directory_modules(os.path.dirname(path)) + + # self.cfg_create_from_file(path, project_modules, local_modules) + + # EXPECTED = ['Not Yet'] + + # for node, expected_label in zip(self.cfg.nodes, EXPECTED): + # self.assertEqual(node.label, expected_label) diff --git a/tests/project_handler_test.py b/tests/project_handler_test.py index dfe1b67b..9abb990f 100644 --- a/tests/project_handler_test.py +++ b/tests/project_handler_test.py @@ -2,34 +2,38 @@ import unittest from pprint import pprint -from pyt.project_handler import get_python_modules, is_python_module - +from pyt.project_handler import ( + get_modules, + get_modules_and_packages, + is_python_file +) class ProjectHandlerTest(unittest.TestCase): """Tests for the project handler.""" - def test_is_python_module(self): + def test_is_python_file(self): python_module = './project_handler_test.py' not_python_module = '../.travis.yml' - self.assertEqual(is_python_module(python_module), True) - self.assertEqual(is_python_module(not_python_module), False) + self.assertEqual(is_python_file(python_module), True) + self.assertEqual(is_python_file(not_python_module), False) - def test_get_python_modules(self): + def test_get_modules(self): project_folder = os.path.normpath(os.path.join('example', 'test_project')) project_namespace = 'test_project' folder = 'folder' directory = 'directory' - modules = get_python_modules(project_folder) + modules = get_modules(project_folder) app_path = os.path.join(project_folder, 'app.py') utils_path = os.path.join(project_folder,'utils.py') exceptions_path = os.path.join(project_folder, 'exceptions.py') some_path = os.path.join(project_folder, folder, 'some.py') - indhold_path = os.path.join(project_folder, folder, 'indhold.py') + indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') + relative_folder_name = '.' + folder app_name = project_namespace + '.' + 'app' utils_name = project_namespace + '.' + 'utils' exceptions_name = project_namespace + '.' + 'exceptions' @@ -40,10 +44,55 @@ def test_get_python_modules(self): utils_tuple = (utils_name, utils_path) exceptions_tuple = (exceptions_name, exceptions_path) some_tuple = (some_name, some_path) + indhold_tuple = (indhold_name, indhold_path) self.assertIn(app_tuple, modules) self.assertIn(utils_tuple, modules) self.assertIn(exceptions_tuple, modules) self.assertIn(some_tuple, modules) + self.assertIn(indhold_tuple, modules) self.assertEqual(len(modules), 5) + + def test_get_modules_and_packages(self): + project_folder = os.path.normpath(os.path.join('example', 'test_project')) + + project_namespace = 'test_project' + folder = 'folder' + directory = 'directory' + + modules = get_modules_and_packages(project_folder) + + folder_path = os.path.join(project_folder, folder) + app_path = os.path.join(project_folder, 'app.py') + exceptions_path = os.path.join(project_folder, 'exceptions.py') + utils_path = os.path.join(project_folder,'utils.py') + directory_path = os.path.join(project_folder, folder, directory) + some_path = os.path.join(project_folder, folder, 'some.py') + indhold_path = os.path.join(project_folder, folder, directory, 'indhold.py') + + relative_folder_name = '.' + folder + app_name = project_namespace + '.' + 'app' + exceptions_name = project_namespace + '.' + 'exceptions' + utils_name = project_namespace + '.' + 'utils' + relative_directory_name = '.' + folder + '.' + directory + some_name = project_namespace + '.' + folder + '.some' + indhold_name = project_namespace + '.' + folder + '.' + directory + '.indhold' + + folder_tuple = (relative_folder_name[1:], folder_path, relative_folder_name) + app_tuple = (app_name, app_path) + exceptions_tuple = (exceptions_name, exceptions_path) + utils_tuple = (utils_name, utils_path) + directory_tuple = (relative_directory_name[1:], directory_path, relative_directory_name) + some_tuple = (some_name, some_path) + indhold_tuple = (indhold_name, indhold_path) + + self.assertIn(folder_tuple, modules) + self.assertIn(app_tuple, modules) + self.assertIn(exceptions_tuple, modules) + self.assertIn(utils_tuple, modules) + self.assertIn(folder_tuple, modules) + self.assertIn(some_tuple, modules) + self.assertIn(indhold_tuple, modules) + + self.assertEqual(len(modules), 7) diff --git a/tests/vars_visitor_test.py b/tests/vars_visitor_test.py index 4df37acf..9232b016 100644 --- a/tests/vars_visitor_test.py +++ b/tests/vars_visitor_test.py @@ -5,7 +5,7 @@ class VarsVisitorTestCase(unittest.TestCase): - '''Baseclass for VarsVisitor tests''' + """Baseclass for VarsVisitor tests""" def perform_vars_on_expression(self, expr): obj = ast.parse(expr) diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py new file mode 100644 index 00000000..224324a0 --- /dev/null +++ b/tests/vulnerabilities_across_files_test.py @@ -0,0 +1,69 @@ +import ast +import os + +from .base_test_case import BaseTestCase +from pyt import trigger_definitions_parser, vulnerabilities +from pyt.ast_helper import get_call_names_as_string +from pyt.base_cfg import Node +from pyt.constraint_table import constraint_table, initialize_constraint_table +from pyt.fixed_point import analyse +from pyt.flask_adaptor import FlaskAdaptor +from pyt.lattice import Lattice +from pyt.project_handler import get_directory_modules, get_modules +from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis + + +class EngineTest(BaseTestCase): + def run_analysis(self, path): + path = os.path.normpath(path) + + project_modules = get_modules(os.path.dirname(path)) + local_modules = get_directory_modules(os.path.dirname(path)) + + self.cfg_create_from_file(path, project_modules, local_modules) + + cfg_list = [self.cfg] + + FlaskAdaptor(cfg_list, [], []) + + initialize_constraint_table(cfg_list) + + analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) + + return vulnerabilities.find_vulnerabilities(cfg_list, ReachingDefinitionsTaintAnalysis) + + def test_find_vulnerabilities_absolute_from_file_command_injection(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/absolute_from_file_command_injection.py') + + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_find_vulnerabilities_absolute_from_file_command_injection_2(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/absolute_from_file_command_injection_2.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_no_false_positive_absolute_from_file_command_injection_3(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/no_false_positive_absolute_from_file_command_injection_3.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=0) + + # This fails due to a false positive in get_vulnerability + # def test_absolute_from_file_does_not_exist(self): + # vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/absolute_from_file_does_not_exist.py') + # self.assert_length(vulnerability_log.vulnerabilities, expected_length=0) + + def test_find_vulnerabilities_import_file_command_injection(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/import_file_command_injection.py') + + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_find_vulnerabilities_import_file_command_injection_2(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/import_file_command_injection_2.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_no_false_positive_import_file_command_injection_3(self): + vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/no_false_positive_import_file_command_injection_3.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=0) + + # This fails due to a false positive in get_vulnerability + # def test_import_file_does_not_exist(self): + # vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/import_file_does_not_exist.py') + # self.assert_length(vulnerability_log.vulnerabilities, expected_length=0) diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 0b80576e..47483e35 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -179,3 +179,11 @@ def test_find_vulnerabilities_variable_assign_no_vuln(self): def test_find_vulnerabilities_command_injection(self): vulnerability_log = self.run_analysis('example/vulnerable_code/command_injection.py') self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_find_vulnerabilities_inter_command_injection(self): + vulnerability_log = self.run_analysis('example/vulnerable_code/inter_command_injection.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + + def test_find_vulnerabilities_inter_command_injection_2(self): + vulnerability_log = self.run_analysis('example/vulnerable_code/inter_command_injection_2.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1)