In [1]:
from os import path
import pandas as pd
from glob import glob
from util.table_converter import TableConverter, TableProps;
from generator.constant import Constant
from statistics import mean, median, variance, stdev

In [2]:
target_project_names = list(map(lambda s : s.split('/')[-1].split('.')[0], glob('../compile_information/*.json')))
target_project_names.remove('template')
target_project_names.remove('compile_information')

target_project_root_paths = [Constant.local_project_root_path + '/' + tpn for tpn in target_project_names]

project_root_path = Constant.output_root_path + '/' + 'project.csv'
project_df = pd.read_csv(project_root_path)

file_root_path = Constant.output_root_path + '/' + 'file.csv'
file_df = pd.read_csv(file_root_path)

available_macro_root_path = Constant.output_root_path + '/' + 'available_macros.csv'
available_macro_df = pd.read_csv(available_macro_root_path)

used_macro_root_path = Constant.output_root_path + '/' + 'used_macros.csv'
used_macro_df = pd.read_csv(used_macro_root_path)

defined_macro_root_path = Constant.output_root_path + '/' + 'defined_macros.csv'
defined_macro_df = pd.read_csv(defined_macro_root_path)

whole_macro_root_path = Constant.output_root_path + '/' + 'whole_macros.csv'
whole_macro_df = pd.read_csv(whole_macro_root_path)

macros_root_path =Constant.output_root_path + '/' + 'macros.csv'
macros_df = pd.read_csv(macros_root_path)

tvc = TableConverter()


In [3]:
def show_number_in_each_file(macro_df):
    columns = pd.Series(['project name', 'file num', 'variaty of macro num', 'mean', 'median', 'variance', 'stdev'])
    macro_per_files_count = macro_df.groupby(['file_id']).count().sort_values(['macro_id'], ascending=False)
    file_macro_count_df = pd.merge(macro_per_files_count, file_df, on='file_id', how='left')
    file_macro_df = pd.merge(macro_df, file_df, on='file_id', how='left')
    del file_macro_count_df['name']
    del file_macro_count_df['path']
    del file_macro_count_df['id']
    del file_macro_df['name']
    del file_macro_df['path']
    del file_macro_df['id']
    target_p_names = project_df[['project_id', 'name']]
    rows = []
    for _, dt in target_p_names.iterrows():
        if not dt['name'] in target_project_names:
            continue
        row = [dt['name']]
        filterd_count_df = file_macro_count_df[file_macro_count_df['project_id'] == dt['project_id']]
        filterd_df = file_macro_df[file_macro_df['project_id'] == dt['project_id']]
        data = filterd_count_df['macro_id']
        valid_digits = min(len(str(max(data))), len(str(len(filterd_count_df))))
        row += [
            len(data),
            len(pd.unique(filterd_df['macro_id'])),
            round(mean(data), valid_digits), 
            round(median(data), valid_digits), 
            round(variance(data), valid_digits), 
            round(stdev(data), valid_digits)
        ]
        rows.append(row)
    return pd.DataFrame(rows, columns=columns)

## Infomation of Available Macros

In [4]:
available_info_df = show_number_in_each_file(available_macro_df)
tvc.save_as_table(TableProps(title='available macro per file info', file_name='available_macro_info', data=available_info_df))
print(available_info_df)

  project name  file num  variaty of macro num       mean  median  \
0      radare2      1727                 30105  3492.5298  4006.0   
1        nginx       339                 26256  5545.0410  5708.0   
2     goaccess        39                  4181  1965.5600  2004.0   
3          nnn         9                  9324  1802.7000  1310.0   
4          ish       167                  9064  2077.8320  1638.0   
5       brotli        88                  2336   936.8200  1018.0   

       variance      stdev  
0  3.884547e+06  1970.9254  
1  2.369480e+06  1539.3120  
2  6.885119e+05   829.7700  
3  2.323378e+06  1524.3000  
4  1.498494e+06  1224.1300  
5  1.209509e+05   347.7800  


## Information of Whole Macros

In [5]:
whole_info_df = show_number_in_each_file(whole_macro_df)
tvc.save_as_table(TableProps(title='whole macro per file info', file_name='whole_macro_info', data=whole_info_df))
print(whole_info_df)

  project name  file num  variaty of macro num       mean  median  \
0      radare2      1727                 30411  3519.2693  4040.0   
1        nginx       339                 26331  5590.4400  5758.0   
2     goaccess        39                  4207  1978.6900  2028.0   
3          nnn         9                  9348  1805.9000  1312.0   
4          ish       167                  9152  2083.9940  1640.0   
5       brotli        88                  2432   946.7200  1022.0   

       variance      stdev  
0  3.945235e+06  1986.2616  
1  2.400397e+06  1549.3220  
2  7.014283e+05   837.5100  
3  2.334693e+06  1528.0000  
4  1.509682e+06  1228.6910  
5  1.255317e+05   354.3000  


## Information of Defined Macros

In [6]:
defined_macro_info_df = show_number_in_each_file(defined_macro_df)
tvc.save_as_table(TableProps(title='defined macro per file info', file_name='defined_macro_info', data=defined_macro_info_df))
print(defined_macro_info_df)



  project name  file num  variaty of macro num     mean  median     variance  \
0      radare2       957                 20020   27.421     4.0    17174.409   
1        nginx       178                  1489   10.618     3.0      338.283   
2     goaccess        19                   389   21.790     4.0     3023.510   
3          nnn         9                  4193  467.200    10.0  1505941.700   
4          ish       116                  1543   15.302     3.0     1444.717   
5       brotli        69                   330    7.350     2.0      259.410   

      stdev  
0   131.051  
1    18.392  
2    54.990  
3  1227.200  
4    38.009  
5    16.110  


## Information of Used Macros

In [7]:
used_macro_info_df = show_number_in_each_file(used_macro_df)
tvc.save_as_table(TableProps(title='used macro per file info', file_name='used_macro_info', data=used_macro_info_df))
print(used_macro_info_df)

  project name  file num  variaty of macro num    mean  median   variance  \
0      radare2      1339                  6183  12.344     7.0    624.342   
1        nginx       271                  1006  20.882    16.0    423.653   
2     goaccess        30                   304  15.500     7.0    314.400   
3          nnn         3                   270  92.700    11.0  21772.300   
4          ish       121                   828  14.230     7.0    354.830   
5       brotli        62                   129   5.080     2.0     48.500   

     stdev  
0   24.987  
1   20.583  
2   17.730  
3  147.600  
4   18.840  
5    6.960  
