diff --git a/README.md b/README.md index 014b118..e4b55f1 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,24 @@ out_put = core_extractor.extractor (r"path_to_repo/code") print(out_put) ``` +- To exclude specific files from repository. + +```sh +from functiondefextractor import core_extractor +out_put = core_extractor.extractor (r"path_to_repo/code", + regex_pattern=r'*\test\*, *.java') +print(out_put) +``` + +Sample regex patterns: (Note: replace # with *) + +1. '#.java' => to exclude all java files in a repository. + +2. '#/test/#' => to exclude test folder and files in it. + +3. '#/src/#/*.cpp' => to exclude all cpp files in src and + it's sub directories + - To extract functions based on annotation. ```sh @@ -81,10 +99,10 @@ print(out_put) For example to search assert, suppress warnings patterns. ```sh -from functiondefextractor import core_extractor +from functiondefextractor import condition_checker out_put = core_extractor.check_condition ("@SupressWarning", r"path_to_excelfile/dataframe", "(") -print(out_put) +print(out_put[0], out_put[1]) ``` ### Commandline @@ -95,11 +113,18 @@ print(out_put) >>>python -m functiondefextractor.extractor_cmd --p path/to/repo ``` +- To ignore files from repo using regex pattern. + +```sh +>>>python -m functiondefextractor.extractor_cmd --p path/to/repo + --i '*.java, *.cpp' +``` + - To analyse various patterns in the code based on given condition. ```sh >>>python -m functiondefextractor.extractor_cmd - --c "@Assert" --e path/to/excel/dataframe --s "(" + --c "Assert" --e path/to/excel --s "(" ``` - Help option can be found at, @@ -108,6 +133,58 @@ print(out_put) >>>python -m functiondefextractor.extractor_cmd -h ``` +### Sample use cases + +- To extract all functions from a repository + +```sh +>>>python -m functiondefextractor.extractor_cmd --p path/to/repo +``` + +```sh +from functiondefextractor import core_extractor +out_put = core_extractor.extractor (r"path_to_repo/code") +print(out_put) +``` + +- To extract all functions with "@Test" annotation + excluding all ".cpp" files in the repository + +```sh +>>>python -m functiondefextractor.extractor_cmd --p path/to/repo + --a "@Test" --i '*.cpp' +``` + +```sh +from functiondefextractor import core_extractor +out_put = core_extractor.extractor + (r"path_to_repo/code", annot="@Test", regex_pattern=r'*.cpp') +print(out_put) +``` + +Note: + +1. functionstartwith argument can be used to specifically extract code +from required functions whose names starts with "test_" or what ever name +user is interested in. + +2. delta and annot arguments together can be used to extract required number +of lines below and above the given annotation/keyword. + +- To analyze various patterns present in extracted code + +```sh +>>>python -m functiondefextractor.extractor_cmd + --c "Assert" --e path/to/excel --s "(" +``` + +```sh +from functiondefextractor import condition_checker +out_put = core_extractor.check_condition + ("@SupressWarning", r"path_to_excelfile/dataframe", "(") +print(out_put[0], out_put[1]) +``` + ### Output - Executing functiondefextractor to extract functions from @@ -117,9 +194,10 @@ print(out_put) - Using functiondefextractor to extract functions from code would return a dataframe with same content as excel file. -- When functiondefextractor is executed to analyse patterns in code, an excel file - with multiple sheets would be generated which contains the requested patterns and - pivot table. Also an html file with pivot table of the same would be generated. +- When functiondefextractor is executed from script to analyse patterns in code, + a tuple with 2 data frames would be generated which contains the requested pattern + statements with their count in various functions and a pivot table of the + same respectively. ## Contact diff --git a/build_scripts/dependencies_static_analysis_test_cov.py b/build_scripts/dependencies_static_analysis_test_cov.py index 32c4c59..6b85fba 100644 --- a/build_scripts/dependencies_static_analysis_test_cov.py +++ b/build_scripts/dependencies_static_analysis_test_cov.py @@ -90,7 +90,7 @@ def check_dead_code(): """ checks the repo for dead code with minimum confidence 100 """ - call_subprocess("python3 -m vulture --min-confidence 60 " + call_subprocess("python -m vulture --min-confidence 60 " "functiondefextractor test build_scripts whitelist.py") print("Stage dead code detection -- COMPLETED & PASSED --") diff --git a/functiondefextractor/condition_checker.py b/functiondefextractor/condition_checker.py index 7082792..e30ef3d 100644 --- a/functiondefextractor/condition_checker.py +++ b/functiondefextractor/condition_checker.py @@ -21,18 +21,18 @@ def check_condition(condition, file_path_dataframe, splitter=None): test_assert = condition if ['Uniq ID'] not in data.columns.ravel(): return "Couldn't find Uniq ID column" - data = pd.DataFrame(data, columns=['Uniq ID', 'Code']).set_index("Uniq ID") + data = pd.DataFrame(data, columns=['Uniq ID', 'Code']) specifier_column = [] spe_data = "" for i in range(len(data)): - for line in str(data.iat[i, 0]).splitlines(): + for line in str(data.iat[i, 1]).splitlines(): if test_assert.upper() in line.strip().upper(): spe_data = spe_data + line.strip() + os.linesep specifier_column.append(spe_data) spe_data = "" data['Count of %s in function' % test_assert] = data["Code"].str.upper().str.count(test_assert.upper()) data["%s Statements" % test_assert] = specifier_column - get_pivot_table_result(data, test_assert, splitter, file_path_dataframe) + return get_pivot_table_result(data, test_assert, splitter, file_path_dataframe) def get_pivot_table_result(data, test_assert, splitter, file_path): @@ -47,16 +47,20 @@ def get_pivot_table_result(data, test_assert, splitter, file_path): data["%s Statements" % test_assert] = data["%s Statements" % test_assert].apply(lambda x: x.split(splitter)[0]) data_table = data.groupby("%s Statements" % test_assert).count().iloc[:, 1] data_table = data_table.to_frame() - data_table = data_table.rename({'Count of %s in function' % test_assert: - 'Different %s pattern counts' % test_assert}, axis='columns') + data_table = data_table.rename({'Code': 'Different %s pattern counts' % test_assert}, axis='columns') data_table = data_table.reset_index() data_table["%s Statements" % test_assert] = data_table["%s Statements" % test_assert].str.wrap(200) if data_table.iat[0, 0] == '': # pragma: no mutate data_table = data_table.drop([data_table.index[0]]) - html_file_path = os.path.join(os.path.dirname(file_path), 'Pivot_table_%s.html') % test_assert.strip("@") - writer = pd.ExcelWriter(os.path.join(os.path.dirname(file_path), 'Pattern_Result_%s.xlsx') - % test_assert.strip("@"), engine='xlsxwriter') - data.to_excel(writer, sheet_name='Data') # pragma: no mutate - data_table.to_excel(writer, sheet_name='Pivot Table') # pragma: no mutate - data_table.to_html(html_file_path) - writer.save() + if str(type(file_path)) != "": + html_file_path = os.path.join(os.path.dirname(file_path), 'Pivot_table_%s.html') % test_assert.strip("@") + writer = pd.ExcelWriter(os.path.join(os.path.dirname(file_path), 'Pattern_Result_%s.xlsx') + % test_assert.strip("@"), engine='xlsxwriter') + data.to_excel(writer, sheet_name='Data') # pragma: no mutate + data_table.to_excel(writer, sheet_name='Pivot Table') # pragma: no mutate + data_table.to_html(html_file_path) + writer.save() + ret_val = "Report files successfully generated at input path" + else: + ret_val = data, data_table + return ret_val diff --git a/functiondefextractor/core_extractor.py b/functiondefextractor/core_extractor.py index 73a765f..d33c412 100644 --- a/functiondefextractor/core_extractor.py +++ b/functiondefextractor/core_extractor.py @@ -1,6 +1,7 @@ """Koninklijke Philips N.V., 2019 - 2020. All rights reserved.""" import datetime +import fnmatch import subprocess import os import re @@ -13,7 +14,7 @@ LOG = cl.get_logger() DELTA_BODY = [] UID_LIST = [] -FILE_TYPE = [".java", ".cpp", ".c", ".cs", ".py", ".ts", ".js"] # pragma: no mutate +FILE_TYPE = ["JAVA", "CPP", "C", "CS", "PY", "TS", "JS"] # pragma: no mutate def get_file_names(dir_path): @@ -33,6 +34,29 @@ def get_file_names(dir_path): return allfiles +def filter_reg_files(allfiles, reg_pattern): + """ Function used to filter requested file patterns + from the files in the given directory + @parameters + allfiles: list of all files in the repository + @return + This function returns filtered files in the given directory""" + cmd = "" # pragma: no mutate + regex, filtered_files = [], [] + if reg_pattern is None: + filtered_files = allfiles + else: + reg_pattern = reg_pattern.split(",") + for i in range(len(reg_pattern).__trunc__()): + cmd = "{} " + cmd + regex.append(fnmatch.translate(reg_pattern[i])) + cmd = "(" + cmd[:-1].replace(" ", "|") + ")" # pragma: no mutate + re_obj = re.compile(cmd.format(*regex)) + [filtered_files.append(allfiles[i]) if + re.match(re_obj, allfiles[i]) is None else None for i in range(len(allfiles))] + return filtered_files + + def run_ctags_cmd(file_ext, file_names, find): """ Function to execute ctags command @parameters @@ -42,11 +66,11 @@ def run_ctags_cmd(file_ext, file_names, find): @return This function returns ctags output""" if file_ext.upper() == "PY": - cmd = 'ctags -x "%s"' % file_names - elif file_ext.upper() in ["TS", "JS"]: - cmd = 'ctags --language-force=java -x "%s" | grep %s' % (file_names, find) + cmd = 'ctags -x "%s"' % file_names # pragma: no mutate + elif file_ext.upper() in ["TS", "JS"]: # pragma: no mutate + cmd = 'ctags --language-force=java -x "%s" | grep %s' % (file_names, find) # pragma: no mutate else: - cmd = 'ctags -x "%s" | grep %s' % (file_names, find) + cmd = 'ctags -x "%s" | grep %s' % (file_names, find) # pragma: no mutate proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) return proc @@ -57,23 +81,21 @@ def get_function_names(file_names): file_names: Path to the file @return This function returns function/method names and line numbers of all the given files""" - file_ext = str(os.path.basename(file_names).split('.')[1]) + file_ext = file_names.split('.')[-1].upper() find = "function" if file_ext.upper() == "CPP" or file_ext.upper() == "C" \ else ["member", "function", "class"] if file_ext.upper() == "PY" else "method" # pragma: no mutate proc = run_ctags_cmd(file_ext, file_names, find) process = str(proc.stdout.read(), 'utf-8') - return process_function_names(process, find, file_names) + return process_function_names(process, find) -def process_function_names(func_data, find, file_names): +def process_function_names(func_data, find): """ This function cleans the ctags output to get function/method names and line numbers @parameters func_data: Ctags output find: keyword of method type(member/function/class/method) @return This function returns list of function names and line numbers""" - if func_data.strip() == "": # pragma: no mutate - LOG.info("ctags: Warning: cannot open input file %s", file_names) # pragma: no mutate if func_data is not None: process_list = re.findall(r'\w+', func_data) val = [index for index, _ in enumerate(process_list) if @@ -207,7 +229,7 @@ def check_py_annot(file_name, annot): This function returns function/method names that has the given annotation""" line_data = list( [line.rstrip() for line in open(file_name, encoding='utf-8', errors='ignore')]) # pragma: no mutate - val = 0 + val = 1 if annot.upper() == "TEST_": # Making use of annotation search function for function start with feature too annot = "def test_" val = -1 @@ -369,9 +391,9 @@ def filter_files(list_files): This function returns the list of required file(.java, .cpp, .c, .cs, .py) paths """ local_files = [] for files in list_files: - extension = os.path.splitext(files) + extension = files.split('.')[-1].upper() if len(extension).__trunc__() > 0: - if extension[1] in FILE_TYPE: + if extension in FILE_TYPE: local_files.append(files) return local_files @@ -430,7 +452,7 @@ def process_delta_lines_data(): UID_LIST.clear() mask = data_frame['Uniq ID'].duplicated(keep=False) data_frame.loc[mask, 'Uniq ID'] += data_frame.groupby('Uniq ID').cumcount().add(1).astype(str) - return data_frame.set_index("Uniq ID").sort_values('Uniq ID') + return data_frame.sort_values('Uniq ID') def process_final_data(code_list): @@ -443,7 +465,7 @@ def process_final_data(code_list): data_frame = pd.DataFrame.from_dict(data, orient='index') data_frame = data_frame.transpose() UID_LIST.clear() - return data_frame.set_index("Uniq ID").sort_values('Uniq ID') + return data_frame def process_py_files(code_list, line_num, func_name, annot): @@ -502,16 +524,6 @@ def clean_log(): open(file_name, 'w').close() -def get_log_data(line): - """ function to get the line requested from log data""" - ini_path = os.path.abspath(os.path.join - (os.path.dirname(__file__), os.pardir)) - file_name = os.path.join(ini_path, "functiondefextractor", "extractor.log") - file_variable = open(file_name, encoding='utf-8', errors='ignore') # pragma: no mutate - all_lines_variable = file_variable.readlines() - return all_lines_variable[-line] - - def remove_comments(dataframe): """ This function removes comments from the code extracted @parameters @@ -521,7 +533,7 @@ def remove_comments(dataframe): filtered_code = [] data = "" for i in range(len(dataframe).__trunc__()): - for line in dataframe.iat[i, 0].splitlines(): + for line in dataframe.iat[i, 1].splitlines(): if not line.strip().startswith(("#", "//", "/*", "*", "*/")): # pragma: no mutate data = data + line.strip().split(";")[0] + os.linesep filtered_code.append(data) @@ -538,11 +550,11 @@ def get_report(data, path): method_data = [[] for _ in range(len(FILE_TYPE))] method_name = [[] for _ in range(len(FILE_TYPE))] for i in range(len(data).__trunc__()): - extension = os.path.splitext(data.index[i]) - res = str([ext for ext in FILE_TYPE if ext == str(extension[1]).split("_")[0].lower()]) + extension = data["Uniq ID"][i].split('.')[-1].upper() # pragma: no mutate + res = str([ext for ext in FILE_TYPE if ext == str(extension).split("_")[0].upper()]) if str(res) != "[]": # pragma: no mutate - method_data[int(FILE_TYPE.index(res.strip("[]''")))].append(data.iat[i, 0]) # pylint: disable=E1310 - method_name[int(FILE_TYPE.index(res.strip("[]''")))].append(data.index[i]) # pylint: disable=E1310 + method_data[int(FILE_TYPE.index(res.strip("[]''")))].append(data.iat[i, 1]) # pylint: disable=E1310 + method_name[int(FILE_TYPE.index(res.strip("[]''")))].append(data.iat[i, 0]) # pylint: disable=E1310 return write_report_files(path, method_name, method_data) @@ -556,28 +568,28 @@ def write_report_files(path, method_name, method_data): returns a dataframe with all the extracted method names and definitions""" for i in range(len(FILE_TYPE).__trunc__()): dataframe = pd.DataFrame(list(zip(method_name[i], method_data[i])), - columns=['Uniq ID', 'Code']).set_index("Uniq ID") + columns=['Uniq ID', 'Code']) if len(dataframe).__trunc__() != 0: - writer = pd.ExcelWriter('%s.xlsx' % os.path.join(path, "ExtractedFunc_" + str(FILE_TYPE[i]).strip(".") - + "_" + str(datetime.datetime.fromtimestamp(time.time()). - strftime('%H-%M-%S_%d_%m_%Y'))), + writer = pd.ExcelWriter('%s.xlsx' % # pragma: no mutate + os.path.join(path, "ExtractedFunc_" + str(FILE_TYPE[i]).strip( # pragma: no mutate + ".") + "_" + str(datetime.datetime. # pragma: no mutate + fromtimestamp(time.time()) + .strftime('%H-%M-%S_%d_%m_%Y'))), # pragma: no mutate engine='xlsxwriter') # pragma: no mutate dataframe.to_excel(writer, sheet_name="funcDefExtractResult") writer.save() - return pd.DataFrame(list(zip(method_name, method_data)), columns=['Uniq ID', 'Code']).set_index("Uniq ID") + return pd.DataFrame(list(zip(method_name, method_data)), columns=['Uniq ID', 'Code']) def validate_input_paths(path): """This function helps in validating the user inputs""" + ret_val = None status_path = os.path.exists(path) + if status_path: + ret_val = False if not status_path: - print("Enter Valid Path", path) # pragma: no mutate - LOG.info("Enter valid path %s", path) # pragma: no mutate - sys.stdout.flush() - script = None # pragma: no mutate - cmd = 'python %s --h' % script - subprocess.call(cmd, shell=True) # pragma: no mutate - return "Enter valid path" + ret_val = True + return ret_val def initialize_values(delta, annot, path_loc, report_folder, functionstartwith): @@ -604,7 +616,7 @@ def initialize_values(delta, annot, path_loc, report_folder, functionstartwith): return report_folder, annot -def extractor(path_loc, annot=None, delta=None, functionstartwith=None, report_folder=None): +def extractor(path_loc, annot=None, delta=None, functionstartwith=None, report_folder=None, regex_pattern=None): """ Function that initiates the overall process of extracting function/method definitions from the files @parameters path_loc: directory path of the repository @@ -623,8 +635,8 @@ def extractor(path_loc, annot=None, delta=None, functionstartwith=None, report_f else: report_folder, annot = initialize_values(delta, annot, path_loc, report_folder, functionstartwith) code_list = [] - for func_name in filter_files(get_file_names(path_loc)): - LOG.info("Extracting %s", func_name) # pragma: no mutate + for func_name in filter_files(filter_reg_files(get_file_names(path_loc), regex_pattern)): + LOG.info("Extracting %s" % func_name) # pragma: no mutate if delta is not None: get_delta_lines(func_name, annot, delta) else: @@ -633,11 +645,8 @@ def extractor(path_loc, annot=None, delta=None, functionstartwith=None, report_f code_list = process_py_files(code_list, line_num, func_name, annot) else: code_list = process_input_files(line_num, functions, annot, func_name, code_list) - if "Warning:" in get_log_data(1): - print("Failed to extracted %s", func_name) # pragma: no mutate - else: - LOG.info("Successfully extracted %s", func_name) # pragma: no mutate end = time.time() - LOG.info("Extraction process took %s minutes", round((end - start) / 60, 3)) # pragma: no mutate - LOG.info("%s vaild files has been analysed", len(filter_files(get_file_names(path_loc)))) # pragma: no mutate + LOG.info("Extraction process took %s minutes" % round((end - start) / 60, 3)) # pragma: no mutate + LOG.info("%s vaild files has been analysed" # pragma: no mutate + % len(filter_files(filter_reg_files(get_file_names(path_loc), regex_pattern)))) # pragma: no mutate return remove_comments(get_final_dataframe(delta, code_list)) diff --git a/functiondefextractor/extractor_cmd.py b/functiondefextractor/extractor_cmd.py index 8750e02..213e08d 100644 --- a/functiondefextractor/extractor_cmd.py +++ b/functiondefextractor/extractor_cmd.py @@ -21,7 +21,7 @@ def create_parser(args): func_parser.add_argument('--path', metavar='--p', type=str, - help='the Input folder path') + help='The Input repository path') func_parser.add_argument('--annot', metavar='--a', @@ -39,7 +39,7 @@ def create_parser(args): metavar='--f', type=str, default=None, - help='functions starting with given key word') + help='Functions starting with given key word') func_parser.add_argument('--reportpath', metavar='--r', @@ -47,6 +47,12 @@ def create_parser(args): default=None, help='Input report folder path') + func_parser.add_argument('--ignorefiles', + metavar='--i', + type=str, + default=None, + help='Regex pattern of files to be ignored') + func_parser.add_argument('--excelfilepath', metavar='--e', type=str, @@ -57,13 +63,13 @@ def create_parser(args): metavar='--c', type=str, default=None, - help='condition to analyse against extracted methods') + help='Condition to analyse against extracted methods') func_parser.add_argument('--splitter', metavar='--s', type=str, default=None, - help='key to split the extracted statements to generate a pivot table for easy analysis') + help='Key to split the extracted statements to generate a pivot table for easy analysis') # ...Create your parser as you like... return func_parser.parse_args(args) @@ -95,7 +101,7 @@ def validate_inputs(arg_path, repo): validate_inputs(ARGS.path, "repository") ARGS.reportpath = ARGS.path if ARGS.reportpath is None else ARGS.reportpath validate_inputs(ARGS.reportpath, "report folder") # pragma: no mutate - get_report(extractor(ARGS.path, ARGS.annot, ARGS.delta, ARGS.funcstartwith, ARGS.reportpath) + get_report(extractor(ARGS.path, ARGS.annot, ARGS.delta, ARGS.funcstartwith, ARGS.reportpath, ARGS.ignorefiles) , ARGS.reportpath) else: validate_inputs(ARGS.excelfilepath, "Excel file") diff --git a/functiondefextractor/extractor_log.py b/functiondefextractor/extractor_log.py index 9dce6a2..4e88778 100644 --- a/functiondefextractor/extractor_log.py +++ b/functiondefextractor/extractor_log.py @@ -9,7 +9,7 @@ def get_logger(): logging.basicConfig(filename=os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "functiondefextractor", "extractor.log")), - format='%(asctime)s %(message)s', filemode='a') + format='%(asctime)s %(message)s', filemode='a') # pragma: no mutate # Creating log Object __logger = logging.getLogger() # Setting the threshold of logger to DEBUG diff --git a/spell_check/spell_ignore_md.txt b/spell_check/spell_ignore_md.txt index 9a04920..1ce22aa 100644 --- a/spell_check/spell_ignore_md.txt +++ b/spell_check/spell_ignore_md.txt @@ -75,3 +75,6 @@ xlrd xlsxwriter FileName Supresswarnings +cpp +regex +functionstartwith diff --git a/spell_check/spell_ignore_py.txt b/spell_check/spell_ignore_py.txt index 3b01ad8..7d7460f 100644 --- a/spell_check/spell_ignore_py.txt +++ b/spell_check/spell_ignore_py.txt @@ -19,3 +19,5 @@ dir inputed splitter fname +allfiles + diff --git a/test/test_core_extractor.py b/test/test_core_extractor.py index 79153cd..6251030 100644 --- a/test/test_core_extractor.py +++ b/test/test_core_extractor.py @@ -1,13 +1,14 @@ """Koninklijke Philips N.V., 2019 - 2020. All rights reserved.""" import os +import subprocess import unittest from unittest.mock import patch from test.test_resource import TestResource import pandas as pd from condition_checker import check_condition -from core_extractor import get_file_names, get_report, get_log_data +from core_extractor import get_file_names, get_report, filter_reg_files from core_extractor import get_function_names from core_extractor import get_func_body from core_extractor import extractor @@ -15,6 +16,16 @@ from extractor_cmd import validate_inputs +def get_log_data(line): + """ function to get the line requested from log data""" + ini_path = os.path.abspath(os.path.join + (os.path.dirname(__file__), os.pardir)) + file_name = os.path.join(ini_path, "functiondefextractor", "extractor.log") + file_variable = open(file_name, encoding='utf-8', errors='ignore') # pragma: no mutate + all_lines_variable = file_variable.readlines() + return all_lines_variable[-line] + + class SimpleTest(unittest.TestCase): """Class to run unit test cases on the function definition extractor test""" src_files = os.path.join(TestResource.tst_resource_folder, "test_repo", "src") @@ -29,6 +40,14 @@ def test_get_file_names(self): os.path.join(self.src_files, "python_file.py")] self.assertEqual(expected.sort(), files.sort()) + def test_filter_reg_files(self): + """Function to test filter_reg_files method""" + files = get_file_names(self.src_files) + filter_files = filter_reg_files(files, r'*.py, *.cpp') + expected = [os.path.join(self.src_files, "HelloController.java"), os.path.join(self.src_files, "test_c.c"), + os.path.join(self.src_files, "test_repo.java")] + self.assertEqual(expected.sort(), filter_files.sort()) + def test_get_function_names(self): """Function to test get_function_names method""" func, line_num = get_function_names(os.path.join(self.src_files, "HelloController.java")) @@ -45,70 +64,46 @@ def test_get_func_body(self): expec_func_body = "publicvoidafterAll(){super.restoreStreams();}" self.assertEqual(expec_func_body, func_body_formated) - @staticmethod - def __write_xlsx(data_f, name): - """ Function which write the dataframe to xlsx """ - curr_path = ( - os.path.join((os.path.join(os.path.dirname(__file__), os.pardir)).split("test")[0], "test_resource")) - file_path = os.path.join(curr_path, name) - writer = pd.ExcelWriter('%s.xlsx' % file_path, engine='xlsxwriter') - data_f.to_excel(writer, sheet_name=name) - writer.save() - def test_process_ad(self): """Function to test the complete end to end process of function definition extractor with Annotation and delta)""" - dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), "@Test", "5") - self.__write_xlsx(dataframe, "expeccodeextractor_T_T_A_D") - df1_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "expeccodeextractor_T_T_A_D.xlsx")).sort_values('Uniq ID') + dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), annot="@Test", delta="5") df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "codeextractor_T_T_A_D.xlsx")).sort_values('Uniq ID') - df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") + dataframe["Code"] = dataframe["Code"].str.replace(os.linesep, "") df2_list["Code"] = df2_list["Code"].str.replace("\n", "") - self.assertTrue(df1_list["Code"].equals(df2_list["Code"])) - os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "expeccodeextractor_T_T_A_D.xlsx")) + self.assertTrue(dataframe["Code"].equals(df2_list["Code"])) def test_process_extract(self): """Function to test the complete end to end process of function definition extractor""" dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), None, None) - self.__write_xlsx(dataframe, "expeccodeextractor_T_T_A") - df1_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "expeccodeextractor_T_T_A.xlsx")).sort_values('Uniq ID') df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "codeextractor_T_T_A.xlsx")).sort_values('Uniq ID') - df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") + dataframe["Code"] = dataframe["Code"].str.replace(os.linesep, "") df2_list["Code"] = df2_list["Code"].str.replace(os.linesep, "") df2_list["Code"] = df2_list["Code"].str.replace("\r", "") - self.assertEqual(df1_list["Code"].values.tolist().sort(), df2_list["Code"].values.tolist().sort()) - os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "expeccodeextractor_T_T_A.xlsx")) + self.assertEqual(dataframe["Code"].values.tolist().sort(), df2_list["Code"].values.tolist().sort()) def test_process_annot(self): """Function to test the complete end to end process of function definition extractor (True False annotation)""" - dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), "@Test", None) - self.__write_xlsx(dataframe, "expeccodeextractor_annot") - df1_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "expeccodeextractor_annot.xlsx")).sort_values('Uniq ID') + dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), annot="@Test", + report_folder=None) df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "codeextractor_annot.xlsx")).sort_values('Uniq ID') - df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") - df2_list["Code"] = df2_list["Code"].str.replace("\n", "") - self.assertTrue(df1_list["Code"].equals(df2_list["Code"])) - os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "expeccodeextractor_annot.xlsx")) + "codeextractor_annot.xlsx")) + dataframe["Code"] = dataframe["Code"].str.replace(os.linesep, "") + df2_list["Code"] = df2_list["Code"].str.replace("\r\n", "") + self.assertTrue(dataframe["Code"].equals(df2_list["Code"])) def test_process_python_test_extract(self): """Function to test the complete end to end process of function definition extractor (True True)""" - dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), "test_", None) - self.__write_xlsx(dataframe, "expeccodeextractor_T_T") - df1_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "expeccodeextractor_T_T.xlsx")).sort_values('Uniq ID') + dataframe = extractor((os.path.join(self.file_path, "test_resource", "test_repo")), functionstartwith="test_", + report_folder=None) df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "codeextractor_T_T.xlsx")).sort_values('Uniq ID') - df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") - df2_list["Code"] = df2_list["Code"].str.replace("\n", "") - self.assertTrue(df1_list["Code"].equals(df2_list["Code"])) - os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "expeccodeextractor_T_T.xlsx")) + "codeextractor_T_T.xlsx")) + print(dataframe) + dataframe["Code"] = dataframe["Code"].str.replace(os.linesep, "") + df2_list["Code"] = df2_list["Code"].str.replace("\r\n", "") + self.assertTrue(dataframe["Code"].equals(df2_list["Code"])) def test_invalid_path(self): """Function to test valid input path""" @@ -119,18 +114,21 @@ def test_py_annot_method_names(self): line_data = list([line.rstrip() for line in open(os.path.join(self.src_files, "python_annot_file.py"), encoding='utf-8', errors='ignore')]) self.assertEqual(str(get_py_annot_method_names(line_data, "@staticmethod", 0)), "['validate_return']") + file_dir = os.path.join(self.file_path, "test_resource", "test_repo", "test") + for file in os.listdir(file_dir): + if file.startswith("ExtractedFunc_"): + os.remove(os.path.join(file_dir, file)) def test_get_report(self): """Function to test report generated""" dataframe = get_report(extractor((os.path.join(self.file_path, "test_resource", "test_repo")), None, None), (os.path.join(os.path.dirname(__file__), os.pardir, "test_resource"))) - self.__write_xlsx(dataframe, "Expec_Extracted_methods") df1_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "Extracted_methods.xlsx")).sort_values('Uniq ID') - df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", - "Expec_Extracted_methods.xlsx")).sort_values('Uniq ID') - self.assertEqual(len(df1_list["Code"]), len(df2_list["Code"])) - os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Expec_Extracted_methods.xlsx")) + "codeextractor_T_T_A.xlsx")).sort_values('Uniq ID') + dataframe["Code"] = dataframe["Code"].str.replace(os.linesep, "") + df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") + df1_list["Code"] = df1_list["Code"].str.replace("\r", "") + self.assertEqual(dataframe["Code"].values.tolist().sort(), df1_list["Code"].values.tolist().sort()) my_dir = os.path.join(os.path.dirname(__file__), os.pardir, "test_resource") for fname in os.listdir(my_dir): if fname.startswith("ExtractedFunc_"): @@ -160,27 +158,46 @@ def test_check_condition(self): def test_pivot_table(self): """Function to test pivot table""" - check_condition("assert", - os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Pivot_test.xlsx"), "(") + res = check_condition("assert", + os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Pivot_test.xlsx"), + "(") df1_pivot_table = pd.read_html(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Test_Pivot_table_assert.html")) df2_pivot_table = pd.read_html(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Pivot_table_assert.html")) self.assertEqual(df1_pivot_table[0].replace(r'\\r', '', regex=True).values.tolist(), df2_pivot_table[0].replace(r'\\r', '', regex=True).values.tolist()) + self.assertEqual(res, "Report files successfully generated at input path") self.assertEqual(str(df1_pivot_table[0].columns), str(df2_pivot_table[0].columns)) os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Pattern_Result_assert.xlsx")) os.remove(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "Pivot_table_assert.html")) def test_cmd_inputs(self): """Function to test command line input validation function""" - validate_inputs(os.getcwd(), "sample_path") + validate_inputs((os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", + "codeextractor_T_T.xlsx")), "Excel file") self.assertTrue("Input path validated" in get_log_data(1).strip()) with patch('sys.exit') as exit_mock: validate_inputs("no/path", "sample_path") self.assertTrue("Enter valid sample_path path" in get_log_data(1).strip()) assert exit_mock + def test_extractor_cmd(self): + """Function to test command line working""" + cmd = 'python -m functiondefextractor.extractor_cmd --p "%s"' \ + % (os.path.join(self.file_path, "test_resource", "test_repo", "test")) + subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + my_dir = os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", "test_repo", "test") + for fname in os.listdir(my_dir): + if fname.startswith("ExtractedFunc_"): + df1_list = pd.read_excel(fname).sort_values('Uniq ID') + df2_list = pd.read_excel(os.path.join(os.path.dirname(__file__), os.pardir, "test_resource", + "Extracted_java.xlsx")).sort_values('Uniq ID') + df1_list["Code"] = df1_list["Code"].str.replace(os.linesep, "") + df2_list["Code"] = df2_list["Code"].str.replace(os.linesep, "") + df2_list["Code"] = df2_list["Code"].str.replace("\r", "") + self.assertEqual(df1_list["Code"].values.tolist().sort(), df2_list["Code"].values.tolist().sort()) + if __name__ == '__main__': unittest.main() diff --git a/test_resource/Extracted_java.xlsx b/test_resource/Extracted_java.xlsx new file mode 100644 index 0000000..164c8a4 Binary files /dev/null and b/test_resource/Extracted_java.xlsx differ diff --git a/test_resource/Pattern_Result.xlsx b/test_resource/Pattern_Result.xlsx index f5fde15..cfc877c 100644 Binary files a/test_resource/Pattern_Result.xlsx and b/test_resource/Pattern_Result.xlsx differ diff --git a/test_resource/cmd_help.txt b/test_resource/cmd_help.txt index d9d4198..92202be 100644 --- a/test_resource/cmd_help.txt +++ b/test_resource/cmd_help.txt @@ -1,20 +1,21 @@ usage: extractor_cmd.py [-h] [--path --p] [--annot --a] [--delta --d] [--funcstartwith --f] [--reportpath --r] - [--excelfilepath --e] [--conditionchecker --c] - [--splitter --s] + [--ignorefiles --i] [--excelfilepath --e] + [--conditionchecker --c] [--splitter --s] Function Definition Extractor optional arguments: -h, --help show this help message and exit - --path --p the Input folder path + --path --p The Input repository path --annot --a Annotation condition to get function/method definitions --delta --d Required number of lines at annotated method - --funcstartwith --f functions starting with given key word + --funcstartwith --f Functions starting with given key word --reportpath --r Input report folder path + --ignorefiles --i Regex pattern of files to be ignored --excelfilepath --e Input excel file path/dataframe --conditionchecker --c - condition to analyse against extracted methods - --splitter --s key to split the extracted statements to generate a + Condition to analyse against extracted methods + --splitter --s Key to split the extracted statements to generate a pivot table for easy analysis diff --git a/test_resource/cmd_validate.txt b/test_resource/cmd_validate.txt index 16b1213..a5e8f73 100644 --- a/test_resource/cmd_validate.txt +++ b/test_resource/cmd_validate.txt @@ -1,21 +1,22 @@ Enter valid repository path usage: extractor_cmd.py [-h] [--path --p] [--annot --a] [--delta --d] [--funcstartwith --f] [--reportpath --r] - [--excelfilepath --e] [--conditionchecker --c] - [--splitter --s] + [--ignorefiles --i] [--excelfilepath --e] + [--conditionchecker --c] [--splitter --s] Function Definition Extractor optional arguments: -h, --help show this help message and exit - --path --p the Input folder path + --path --p The Input repository path --annot --a Annotation condition to get function/method definitions --delta --d Required number of lines at annotated method - --funcstartwith --f functions starting with given key word + --funcstartwith --f Functions starting with given key word --reportpath --r Input report folder path + --ignorefiles --i Regex pattern of files to be ignored --excelfilepath --e Input excel file path/dataframe --conditionchecker --c - condition to analyse against extracted methods - --splitter --s key to split the extracted statements to generate a + Condition to analyse against extracted methods + --splitter --s Key to split the extracted statements to generate a pivot table for easy analysis diff --git a/test_resource/codeextractor_T_T.xlsx b/test_resource/codeextractor_T_T.xlsx index eda6602..9e95c9f 100644 Binary files a/test_resource/codeextractor_T_T.xlsx and b/test_resource/codeextractor_T_T.xlsx differ diff --git a/test_resource/codeextractor_annot.xlsx b/test_resource/codeextractor_annot.xlsx index ee67e7d..0f48b5b 100644 Binary files a/test_resource/codeextractor_annot.xlsx and b/test_resource/codeextractor_annot.xlsx differ