-
Notifications
You must be signed in to change notification settings - Fork 7
/
core_extractor.py
681 lines (599 loc) · 28.8 KB
/
core_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
"""Koninklijke Philips N.V., 2019 - 2020. All rights reserved."""
import datetime
import fnmatch
import subprocess
import os
import re
import sys
import time
import pandas as pd
import extractor_log as cl
LOG = cl.get_logger()
DELTA_BODY = []
UID_LIST = []
FILE_TYPE = ["JAVA", "CPP", "C", "CS", "PY", "TS", "JS"] # pragma: no mutate
def get_file_names(dir_path):
""" Function used for getting all the valid file names from the given directory
@parameters
dir_path: Path to the repository
@return
This function returns all the files in the given directory"""
path_list = [os.path.join(dirpath, filename) for dirpath, _, filenames in
os.walk(dir_path) for filename in filenames]
return path_list
def filter_reg_files(allfiles, reg_pattern):
""" Function used to filter requested file patterns
from the files in the given directory
@parameters
allfiles: list of all files in the repository
@return
This function returns filtered files in the given directory"""
cmd = "" # pragma: no mutate
regex, filtered_files = [], []
if reg_pattern is None:
filtered_files = allfiles
else:
reg_pattern = reg_pattern.split(",")
for i in range(len(reg_pattern).__trunc__()):
cmd = "{} " + cmd # pragma: no mutate
regex.append(fnmatch.translate(reg_pattern[i]))
cmd = "(" + cmd[:-1].replace(" ", "|") + ")" # pragma: no mutate
re_obj = re.compile(cmd.format(*regex))
[filtered_files.append(allfiles[i]) if
re.match(re_obj, allfiles[i]) is None else None for i in range(len(allfiles))]
return filtered_files
def run_ctags_cmd(file_ext, file_names, find):
""" Function to execute ctags command
@parameters
file_ext: file type(.py, .java etc)
file_names: path to file
find: keyword to run ctags command
@return
This function returns ctags output"""
if file_ext.upper() == "PY":
cmd = 'ctags -x "%s"' % file_names # pragma: no mutate
elif file_ext.upper() in ["TS", "JS"]: # pragma: no mutate
cmd = 'ctags --language-force=java -x "%s" | grep %s' % (file_names, find) # pragma: no mutate
else:
cmd = 'ctags -x "%s" | grep %s' % (file_names, find) # pragma: no mutate
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
return proc
def get_function_names(file_names):
""" Function to get method/function names from the input files in the given repository
@parameters
file_names: Path to the file
@return
This function returns function/method names and line numbers of all the given files"""
file_ext = file_names.split('.')[-1].upper()
find = "function" if file_ext.upper() == "CPP" or file_ext.upper() == "C" \
else ["member", "function", "class"] if file_ext.upper() == "PY" else "method" # pragma: no mutate
proc = run_ctags_cmd(file_ext, file_names, find)
process = str(proc.stdout.read(), 'utf-8') # pragma: no mutate
return process_function_names(process, find)
def process_function_names(func_data, find):
""" This function cleans the ctags output to get function/method names and line numbers
@parameters
func_data: Ctags output
find: keyword of method type(member/function/class/method)
@return
This function returns list of function names and line numbers"""
if func_data is not None:
process_list = re.findall(r'\w+', func_data)
if find == ["member", "function", "class"]: # pragma: no mutate
val = [index for index, _ in enumerate(process_list) if
process_list[index - 1] in find and process_list[index].isdigit()]
else:
val = process_ctags_output(find, process_list)
function_list = get_sorted_func_list(process_list, val)
line_numbers = get_func_line_num_list(process_list, val)
line_numbers.sort()
else:
print("Input files doesn't have valid methods") # pragma: no mutate
sys.exit(1) # pragma: no mutate
return function_list, line_numbers
def process_ctags_output(find, process_list):
""" This function cleans the ctags output to get function/method names and line numbers
@parameters
process_list: Ctags output in list format
find: keyword of method type(member/function/class/method)
@return
This function returns list of function names and line numbers"""
return [index for index, _ in enumerate(process_list) if
process_list[index - 1] == find and process_list[index].isdigit()]
def get_sorted_func_list(process_list, val):
""" This function sorts function names with respective to line numbers
@parameters
process_list: Ctags output in list format
val: filtered variable to get function name
@return
This function returns list of sorted function names based on line numbers"""
return [val for _, val in
sorted(zip(get_func_line_num_list(process_list, val), get_func_list(process_list, val)))]
def get_func_list(process_list, val):
""" This function refines the ctags output to get function names
@parameters
process_list: Ctags output in list format
val: filtered variable to get function name
@return
This function returns list of function"""
function_list = []
[function_list.append(process_list[val[i] - 2]) for i in range(len(val))]
return function_list
def get_func_line_num_list(process_list, val):
""" This function refines the ctags output to get function line numbers
@parameters
process_list: Ctags output in list format
val: filtered variable to get function name
@return
This function returns list of function line numbers"""
line_numbers = []
[line_numbers.append(int(process_list[val[i]])) for i in range(len(val))]
return line_numbers
def check_annot(filename, line_num, annot):
""" Function checks for the annotation condition
@parameters
filename: Path to the file
line_num: function/method line number
annot: Annotation condition (Ex: @Test)
@return
This function returns function/method definitions that has the given annotation"""
ret_val = None
if annot is None:
ret_val = get_func_body(filename, line_num)
else:
ret_val = get_annot_methods(filename, line_num, annot)
return ret_val
def get_file_content(filename):
""" Function reads the given file
@parameters
filename: Path to the file
@return
This function returns content of the file inputed"""
with open(filename, encoding='utf-8', errors='ignore') as file_data: # pragma: no mutate
return file_data.readlines()
def get_annot_methods(filename, line_num, annot):
""" This function gets the methods that have given annotation
@parameters
filename: Path to the file
line_num: function/method line number
annot: Annotation condition (Ex: @Test)
@return
This function returns function/method definitions that has the given annotation"""
file_content = get_file_content(filename)
iterator = int(line_num) - 2 # Iterating through lines to check for annotations
try:
for _ in range(int(line_num) - 2): # pragma: no mutate
data = str(file_content[iterator]).strip()
iterator = iterator - 1
ret_val = process_annot_method_body(annot, data, filename, line_num)
if ret_val != "continue": # pragma: no mutate
return ret_val
except IndexError as exc:
LOG.info("error while processing file_line: %s", filename + "_" + line_num) # pragma: no mutate
LOG.info(exc) # pragma: no mutate
def process_annotation(annot):
""" This function process the annotation to extract methods having given annotation
@parameters
annot: Annotation condition (Ex: @Test)
@return
This function returns starting and ending character of the annotation"""
annot_start = annot[0]
annot_end = annot[len(annot) - 1]
if annot_end.isalpha():
annot_end = None # pragma: no mutate
return annot_start, annot_end
def process_annot_method_body(annot, data, filename, line_num):
""" This function process the function/method to check if it has the given annotation
@parameters
filename: Path to the file
line_num: function/method line number
annot: Annotation condition (Ex: @Test)
data: Content of the given file
@return
This function returns function/method definitions that has the given annotation"""
ret_val = "continue" # pragma: no mutate
annot_start, annot_end = process_annotation(annot)
if annot.strip(annot_start).strip(annot_end).upper() in data.strip(annot_start) \
.strip(annot_end).upper().split(",") and data.strip().startswith(annot_start):
body = get_func_body(filename, line_num)
if body is None:
body = "" # pragma: no mutate
ret_val = data + os.linesep + str(body)
elif data[:1] != "@" and str(data).strip() == "}" or str(data).strip() == "{": # pragma: no mutate
ret_val = None # pragma: no mutate
return ret_val
def check_py_annot(file_name, annot):
""" Function checks for the annotation condition in python files
@parameters
filename: Path to the file
annot: Annotation condition (Ex: @Test)
@return
This function returns function/method names that has the given annotation"""
line_data = list(
[line.rstrip() for line in open(file_name, encoding='utf-8', errors='ignore')]) # pragma: no mutate
val = 1 # pragma: no mutate
if annot[0] != "@": # Making use of annotation search function for function start with feature too
annot = "def " + annot.lower() # pragma: no mutate
val = -1 # pragma: no mutate
return get_py_annot_method_names(line_data, annot, val)
def get_py_annot_method_names(line_data, annot, val):
""" Function checks for the annotation condition in python files
@parameters
line_data: File content in list format
annot: Annotation condition (Ex: @Test)
val: index pointer that helps in getting method name
@return
This function returns function/method names that has the given annotation"""
data = []
for i, _ in enumerate(line_data):
if annot in line_data[i]:
if str(line_data[i]).strip().split(" ")[0] == "def": # pragma: no mutate
func_name = line_data[i + 1 + val].strip().split(" ")[1].split("(")[0] # pragma: no mutate
data.append(func_name)
else:
for j in range(i, len(line_data)):
if str(line_data[j]).strip().split(" ")[0] == "def": # pragma: no mutate
func_name = line_data[j].strip().split(" ")[1].split("(")[0] # pragma: no mutate
data.append(func_name)
break
return data
def get_func_body(filename, line_num):
""" Function to get method/function body from files
@parameters
filename, line_num: Path to the file, function/method line number
@return
This function returns function/method definitions of all the given files"""
line_num = int(line_num)
code = "" # pragma: no mutate
cnt_braket = 0
found_start = False
return_val = None
with open(filename, "r", encoding='utf-8', errors='ignore') as files: # pragma: no mutate
for i, line in enumerate(files):
if i >= (line_num - 1):
code += line
if line.count("{") > 0:
found_start = True
cnt_braket += line.count("{")
if line.count("}") > 0:
cnt_braket -= line.count("}")
if cnt_braket == 0 and found_start is True:
return_val = code
break
return return_val
def get_py_func_body(line_numbers, file_name, annot):
""" Function to get method/function body from files
@parameters
filename: Path to the file
line_num: function/method line number
annot: Annotation condition (Ex: @Test)
@return
This function returns python function/method definitions in the given files"""
func_name = []
func_body = []
line_data = list([line.rstrip() for line
in open(file_name, encoding='utf-8', errors='ignore')]) # pragma: no mutate
data, data_func_name = process_py_methods(file_name, line_numbers, line_data)
if annot is not None:
data_func_name, data = get_py_annot_methods(file_name, data_func_name, data, annot)
if len(data_func_name).__trunc__() != 0:
func_name, func_body = process_py_func_body(data, data_func_name)
return func_name, func_body
def process_py_methods(file_name, line_numbers, line_data):
""" This Function refines the python function names to remove any class or lamida functions
@parameters
filename: Path to the file
line_num: function/method line number
line_data: File content in list format
@return
This function returns processed python function/method names and definitions in the given files"""
data = []
data_func_name = []
for i, _ in enumerate(line_numbers):
start = line_numbers[i]
stop = len(line_data) if i == len(line_numbers) - 1 else line_numbers[i + 1] - 1
data.append(os.linesep.join(line_data[start - 1:stop])) # pragma: no mutate
data_func_name.append(str(file_name) + "_" + str(line_data[start - 1].strip().split(" ")[1].split("(")[0]))
if data[len(data) - 1].startswith("class") or "lambda" in data[len(data) - 1]:
data.remove(data[len(data) - 1])
data_func_name.pop(len(data_func_name) - 1)
return data, data_func_name
def get_py_annot_methods(file_name, data_func_name, data, annot):
""" This function filters the python functions to get methods that have given annotation
@parameters
filename: Path to the file
data_func_name: list of all function names in the file
data: File content in list format
annot: Annotation condition (Ex: @staticmethod)
@return
This function returns python function/method names and definitions that have the given annotation"""
annot_meth_line_num = check_py_annot(file_name, annot)
annot_meth_name = []
annot_meth_body = []
for k, _ in enumerate(annot_meth_line_num):
for j, _ in enumerate(data_func_name):
if str(annot_meth_line_num[k]) in str(data_func_name[j]):
annot_meth_body.append(data[j])
annot_meth_name.append(data_func_name[j])
break
return annot_meth_name, annot_meth_body
def process_py_func_body(data_body, data_name):
""" This function processes the collected python function definitions to put then in a organized way
@parameters
data_body: list of all function definitions in the file
data_name: list of all function names in the file
@return
This function returns python function/method definitions in a organized format"""
for i, _ in enumerate(data_body):
data_body[i] = os.linesep.join([s for s in str(data_body[i]).splitlines() if s]) # pragma: no mutate
data_body = clean_py_methods(data_body)
return data_name, data_body
def clean_py_methods(data_body):
""" This function cleans the collected python function definitions to remove any junk content entered into method
while collecting
@parameters
data_body: list of all function definitions in the file
data_name: list of all function names in the file
@return
This function returns python function/method definitions in a organized format"""
for j, _ in enumerate(data_body):
data_list = list(str(data_body[j]).split(os.linesep))
count = len(data_list[0]) - len(data_list[0].lstrip())
i = 0 # pragma: no mutate
for i, _ in enumerate(data_list):
if i == len(data_list) - 1 or len(data_list[i + 1]) \
- len(data_list[i + 1].lstrip()) <= count: # pragma: no mutate
break
del data_list[i + 1:]
data_body[j] = str(os.linesep.join(data_list))
return data_body
def filter_files(list_files):
""" Function to filter required files from list of all files
@parameters
list_files: List of all files that the given repository contains
@return
This function returns the list of required file(.java, .cpp, .c, .cs, .py) paths """
local_files = []
for files in list_files:
extension = files.split('.')[-1].upper() # pragma: no mutate
if len(extension).__trunc__() > 0:
if extension in FILE_TYPE:
local_files.append(files)
return local_files
def get_delta_lines(file_name, annot, delta):
""" Function to get + and - delta number of lines from the annoted method/function
@parameters
filename, annot, delta: Path to the file, required annotation, required lines from method """
try:
line_data = list(filter(None, [line.rstrip() for
line in
open(file_name, encoding='utf-8', errors='ignore')])) # pragma: no mutate
data = []
for num, line in enumerate(line_data, 1):
process_delta_lines_body(annot, line, delta, num, line_data, data, file_name)
data = []
except IOError as exc:
LOG.info("Cannot read file: %s", file_name) # pragma: no mutate
LOG.info(exc) # pragma: no mutate
def process_delta_lines_body(annot, line, delta, num, line_data, data, file_name):
""" Function to get + and - delta number of lines from the annoted method/function
@parameters
file_name: Path to the file
annot: Required annotation
delta: Required lines from method
line_data: File content in list format
data: variable that holds delta lines data"""
if annot.upper() in line.strip().upper():
for i in range(0, (int(delta) * 2) + 1):
if num - (int(delta) + 1) + i >= len(line_data): # pragma: no mutate
break
data.append(line_data[num - (int(delta) + 1) + i])
DELTA_BODY.append("\n".join(data))
UID_LIST.append(os.path.basename(file_name) + "_")
def get_flat_list(data_list):
""" Function that generates a list by merging a list of sub lists
@parameters
data_list: list of sub lists
@return
This function returns a flattened list"""
flattened_list = []
for val in data_list:
if str(type(val)) != "<class 'list'>":
flattened_list.append(val)
if str(type(val)) == "<class 'list'>":
for sub_val in val:
flattened_list.append(sub_val)
return flattened_list
def process_delta_lines_data():
""" This function processes delta lines data to generate a dataframe
@return
This function returns a dataframe of delta lines data"""
data = {'Uniq ID': UID_LIST, 'Code': DELTA_BODY}
data_frame = pd.DataFrame(data)
UID_LIST.clear()
mask = data_frame['Uniq ID'].duplicated(keep=False)
data_frame.loc[mask, 'Uniq ID'] += data_frame.groupby('Uniq ID').cumcount().add(1).astype(str)
return data_frame.sort_values('Uniq ID')
def process_final_data(code_list):
""" This function processes function/method data to generate a dataframe
@return
This function returns a dataframe of function/method data"""
flat_uid_list = get_flat_list(UID_LIST)
flat_code_list = get_flat_list(code_list)
data = {'Uniq ID': flat_uid_list, 'Code': flat_code_list}
data_frame = pd.DataFrame.from_dict(data, orient='index')
data_frame = data_frame.transpose()
UID_LIST.clear()
return data_frame
def process_py_files(code_list, line_num, func_name, annot, functionstartwith):
""" This function processes that input python files to extract methods from the given repo
@parameters
code_list: list to store the extracted methods
line_num: list of function line numbers
func_name: list of function names
annot: given annotation condition (Ex: @staticmethod)
@return
This function returns extracted python methods"""
if functionstartwith is not None:
annot = functionstartwith
if len(line_num).__trunc__() != 0:
def_name, def_body = get_py_func_body(line_num, func_name, annot)
if len(def_body).__trunc__() != 0:
UID_LIST.append(def_name)
code_list.append(def_body)
return code_list
def process_input_files(line_num, functions, annot, func_name, code_list, functionstartwith):
""" This function processes that input files to extract methods from the given repo
@parameters
code_list: list to store the extracted methods
line_num: list of function line numbers
func_name: list of function names
annot: given annotation condition (Ex: @staticmethod)
@return
This function returns extracted python methods"""
if functionstartwith is not None:
filter_func, filer_line_no = [], []
for i in range(len(line_num).__trunc__()):
if str(functions[i]).upper().startswith(functionstartwith.upper()):
filter_func.append(functions[i])
filer_line_no.append(line_num[i])
functions = filter_func
line_num = filer_line_no
for lin_no, func in zip(line_num, functions):
if check_annot(func_name, lin_no, annot) is not None:
code_list.append(check_annot(func_name, lin_no, annot))
UID_LIST.append(func_name + "_" + func)
return code_list
def get_final_dataframe(delta, code_list):
""" This function processes extracted data to generate a dataframe
@parameters
code_list: list of extracted methods
delta: Required lines from method
@return
This function returns a dataframe of extracted function/methods"""
if delta is not None:
ret_val = process_delta_lines_data()
else:
ret_val = process_final_data(code_list)
return ret_val
def clean_log():
""" Function to clean the log file"""
ini_path = os.path.abspath(os.path.join
(os.path.dirname(__file__), os.pardir))
file_name = os.path.join(ini_path, "functiondefextractor", "extractor.log")
if os.path.exists(file_name):
open(file_name, 'w').close()
def remove_comments(dataframe):
""" This function removes comments from the code extracted
@parameters
dataframe: extracted methods in dataframe format
@return
This function returns function/method definitions by removing comments"""
filtered_code = []
data = ""
for i in range(len(dataframe).__trunc__()):
for line in dataframe.iat[i, 1].splitlines():
if not line.strip().startswith(("#", "//", "/*", "*", "*/")): # pragma: no mutate
data = data + line.strip().split(";")[0] + os.linesep
filtered_code.append(data)
data = ""
dataframe["Code"] = filtered_code
return dataframe
def get_report(data, path):
""" This function classifies the report files based on the file type(Ex: .java, .cs, .py, etc.)
@parameters
data: extracted methods in dataframe format
path: Report folder path"""
method_data = [[] for _ in range(len(FILE_TYPE))]
method_name = [[] for _ in range(len(FILE_TYPE))]
for i in range(len(data).__trunc__()):
extension = data["Uniq ID"][i].split('.')[-1].upper() # pragma: no mutate
res = str([ext for ext in FILE_TYPE if ext == str(extension).split("_")[0].upper()]) # pragma: no mutate
if str(res) != "[]": # pragma: no mutate
method_data[int(FILE_TYPE.index(res.strip("[]''")))].append(data.iat[i, 1]) # pylint: disable=E1310
method_name[int(FILE_TYPE.index(res.strip("[]''")))].append(data.iat[i, 0]) # pylint: disable=E1310
return write_report_files(path, method_name, method_data)
def write_report_files(path, method_name, method_data):
""" This function write the dataframe to excel files
@parameters
path: Report folder path
method_name: extracted method names
method_data: extracted method definitions
@return
returns a dataframe with all the extracted method names and definitions"""
for i in range(len(FILE_TYPE).__trunc__()):
dataframe = pd.DataFrame(list(zip(method_name[i], method_data[i])),
columns=['Uniq ID', 'Code'])
if len(dataframe).__trunc__() != 0:
writer = pd.ExcelWriter('%s.xlsx' % # pragma: no mutate
os.path.join(path, "ExtractedFunc_" + str(FILE_TYPE[i]).strip( # pragma: no mutate
".") + "_" + str(datetime.datetime. # pragma: no mutate
fromtimestamp(time.time())
.strftime('%H-%M-%S_%d_%m_%Y'))), # pragma: no mutate
engine='xlsxwriter') # pragma: no mutate
dataframe.to_excel(writer, sheet_name="funcDefExtractResult")
writer.save()
return pd.DataFrame(list(zip(method_name, method_data)), columns=['Uniq ID', 'Code'])
def validate_input_paths(path):
"""This function helps in validating the user inputs"""
ret_val = None
status_path = os.path.exists(path)
if status_path:
ret_val = False
if not status_path:
ret_val = True
return ret_val
def initialize_values(delta, annot, path_loc, report_folder):
""" Function that initializes the input variables
@parameters
path_loc: directory path of the repository
annot: given annotation condition (Ex: @staticmethod, @Test)
report_folder: path to report
@return
This function returns a valid report folder and annotation"""
clean_log()
if delta is not None and annot is None:
return "delta(--d) should be in combination with annotation(--a)"
if validate_input_paths(path_loc):
return "Enter valid path" # pragma: no mutate
LOG.info("Input repository path validated successfully") # pragma: no mutate
if report_folder is None:
report_folder = path_loc
if validate_input_paths(report_folder):
return "Enter valid report path" # pragma: no mutate
LOG.info("Input report folder path validated successfully") # pragma: no mutate
return report_folder, annot
def extractor(path_loc, annot=None, delta=None, functionstartwith=None, report_folder=None, exclude=None):
""" Function that initiates the overall process of extracting function/method definitions from the files
@parameters
path_loc: directory path of the repository
annot: given annotation condition (Ex: @staticmethod, @Test)
delta: Required lines from method
@return
This function returns a data frame which contains the function/method names and body
of the processed input files
@usage
function_def_extractor(path to repo, "@test")
the above function call initiates the process to run function definition extraction on
all files with @test annotation of the repository given """
start = time.time()
if isinstance(initialize_values(delta, annot, path_loc, report_folder), str): # pylint: disable=R1705
return initialize_values(delta, annot, path_loc, report_folder)
else:
report_folder, annot = initialize_values(delta, annot, path_loc, report_folder)
code_list = []
for func_name in filter_files(filter_reg_files(get_file_names(path_loc), exclude)):
LOG.info("Extracting %s", func_name) # pragma: no mutate
if delta is not None:
get_delta_lines(func_name, annot, delta)
else:
functions, line_num = get_function_names(func_name)
if os.path.splitext(func_name)[1].upper() == ".PY":
code_list = process_py_files(code_list, line_num, func_name, annot, functionstartwith)
else:
code_list = process_input_files(line_num, functions, annot, func_name, code_list, functionstartwith)
end = time.time()
LOG.info("Extraction process took %s minutes", round((end - start) / 60, 3)) # pragma: no mutate
LOG.info("%s vaild files has been analysed", # pragma: no mutate
len(filter_files(filter_reg_files(get_file_names(path_loc), exclude)))) # pragma: no mutate
return remove_comments(get_final_dataframe(delta, code_list))