In [125]:
import pandas as pd
import os

# I. Build PyLint Dataframe

In [127]:
pylint_path="/Users/nandini/Desktop/CS230/CS230/stat_analysis/pylintcsv/"
#benchmark_name=os.path.basename(pylint_path).split('_pylint.txt')[0]
#benchmark_name

directory = os.fsencode(pylint_path) 

dic = dict()

for file in os.listdir(directory): 
    filename = os.fsdecode(file) 
    benchmark_name = filename.split('_pylint.txt')[0]
    full_path = pylint_path + filename
    df = pd.read_csv(full_path)
    dic[benchmark_name] = df.sum()
    
pylint_df = pd.DataFrame.from_dict(dic).T
pylint_df['TOTAL_PYLINT'] = pylint_df.sum(axis=1)

In [128]:
pylint_df

Unnamed: 0,I,R,C,W,E,F,TOTAL_PYLINT
alt-model-checkpoint-1.0.0,0,1,14,2,6,0,23
bigchaindb_driver-0.5.1,0,35,141,41,63,0,280
birdhousebuilder.recipe.nginx-0.1.4,0,2,64,6,7,0,79
bise.theme-1.20.1,0,29,155,44,86,0,314
bbcode-1.0.8,0,11,33,19,0,0,63
aiida-core-0.12.2,0,1768,9248,3244,789,0,15049
allennlp-0.6.0,0,81,2149,49,1369,0,3648
bok_choy-0.5.0,0,9,37,4,14,0,64
ansigenome-0.5.6,0,18,60,61,3,0,142
autobahn-0.14.1,0,319,2922,686,298,0,4225


# II. Build Bandit DataFrame

In [129]:
# List of bandit tests from https://github.com/PyCQA/bandit
test_ids = ["B101", "B102", "B103", "B104", "B105", "B106", "B107", "B108", "B109", "B110", "B111", "B112"]
test_ids += ["B201"]
test_ids += ["B301", "B302", "B303", "B304", "B305", "B306", "B307", "B308", "B309", "B310"]
test_ids += ["B311", "B312", "B313", "B314", "B315", "B316", "B317", "B318", "B319", "B320"]
test_ids += ["B321", "B322", "B323", "B324", "B325"]
test_ids += ["B401", "B402", "B403", "B404", "B405", "B406", "B407", "B408", "B409", "B410", "B411", "B412", "B413"]
test_ids += ["B501", "B502", "B503", "B504", "B505", "B506", "B507"]
test_ids += ["B601", "B602", "B603", "B604", "B605", "B606", "B607", "B608", "B609", "B610", "B611"]
test_ids += ["B701", "B702", "B703"]

In [130]:
bandit_path="/Users/nandini/Desktop/CS230/CS230/stat_analysis/banditcsv/"
directory = os.fsencode(bandit_path) 
dic = dict()

for file in os.listdir(directory): 
    filename = os.fsdecode(file) 
    benchmark_name = filename.split('_bandit.txt')[0]
    full_path = bandit_path + filename
    df = pd.read_csv(full_path)
    inner_dic = dict()

    # Filter out the low confidence issues
    df = df[df['issue_confidence'] != 'LOW']
    TOTAL_BANDIT = len(df)
    inner_dic['TOTAL_BANDIT'] = TOTAL_BANDIT

    # Count number of issues in each severity category
    HIGH_SEVERITY = len(df[df['issue_severity'] == 'HIGH'])
    MED_SEVERITY = len(df[df['issue_severity'] == 'MEDIUM'])
    LOW_SEVERITY = len(df[df['issue_severity'] == 'LOW'])
    inner_dic['HIGH_SEVERITY'] = HIGH_SEVERITY
    inner_dic['MED_SEVERITY'] = MED_SEVERITY
    inner_dic['LOW_SEVERITY'] = LOW_SEVERITY

    # Count the number of issues in each test category
    id_sum = 0
    for test_id in test_ids:
        id_count = len(df[df['test_id']==test_id])
        inner_dic[test_id] = id_count
        id_sum += id_count
    if (id_sum != TOTAL_BANDIT):
        print("ERROR: Script didn't catch all Bandit tests in" + benchmark_name)
        print("Number of issues caught: " + id_sum)
        print("Number of issues expected: " + TOTAL_BANDIT)
        print("==================================================================")
    
    # Append to dictionary of files 
    dic[benchmark_name] = inner_dic
    
bandit_df = pd.DataFrame.from_dict(dic).T

In [131]:
bandit_df

Unnamed: 0,B101,B102,B103,B104,B105,B106,B107,B108,B109,B110,...,B609,B610,B611,B701,B702,B703,HIGH_SEVERITY,LOW_SEVERITY,MED_SEVERITY,TOTAL_BANDIT
boss-cli-0.3.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,19,1,20
archmage-0.3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2,4,0,6
ansible-vault-1.0.4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,3,1,4
anncolvar-0.3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
aspen-0.38,5,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,11,3,14
aldryn-django-1.6.11.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,2
aioli-0.2.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
bok_choy-0.5.0,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,3,0,3
allennlp-0.6.0,1906,1,0,2,8,1,0,1,0,0,...,0,0,0,0,0,0,15,1952,5,1972
ansigenome-0.5.6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,1,3,5


# III. Combine Pylint and Bandit Dataframes

In [134]:
combined_df = pylint_df.join(bandit_df)
combined_df

Unnamed: 0,I,R,C,W,E,F,TOTAL_PYLINT,B101,B102,B103,...,B609,B610,B611,B701,B702,B703,HIGH_SEVERITY,LOW_SEVERITY,MED_SEVERITY,TOTAL_BANDIT
alt-model-checkpoint-1.0.0,0,1,14,2,6,0,23,0,0,0,...,0,0,0,0,0,0,0,0,0,0
bigchaindb_driver-0.5.1,0,35,141,41,63,0,280,120,0,0,...,0,0,0,0,0,0,0,120,0,120
birdhousebuilder.recipe.nginx-0.1.4,0,2,64,6,7,0,79,0,0,1,...,0,0,0,0,4,0,0,0,6,6
bise.theme-1.20.1,0,29,155,44,86,0,314,0,0,0,...,0,0,0,0,0,0,0,1,4,5
bbcode-1.0.8,0,11,33,19,0,0,63,0,0,0,...,0,0,0,0,0,0,0,0,0,0
aiida-core-0.12.2,0,1768,9248,3244,789,0,15049,47,0,0,...,0,0,0,0,0,0,2,69,42,113
allennlp-0.6.0,0,81,2149,49,1369,0,3648,1906,1,0,...,0,0,0,0,0,0,15,1952,5,1972
bok_choy-0.5.0,0,9,37,4,14,0,64,0,0,0,...,0,0,0,0,0,0,0,3,0,3
ansigenome-0.5.6,0,18,60,61,3,0,142,0,0,0,...,0,0,0,0,0,0,1,1,3,5
autobahn-0.14.1,0,319,2922,686,298,0,4225,343,1,0,...,0,0,0,0,0,0,0,357,4,361
