In [34]:
import pandas as pd
import os

def read_csvs(path):
    dfs = []
    for file in os.listdir(path):
        if file.endswith(".csv"):
            df = pd.read_csv(path + file)
            dfs.append(df)
    return pd.concat(dfs, ignore_index=True)


In [35]:
df = read_csvs(path = "/home/r4ph/desenv/phd/exception-miner/output/fixes/")

In [36]:
len(df.file.unique())

1429

In [37]:
df.columns

Index(['file', 'function', 'func_body', 'project', 'commit_fix', 'repo_url',
       'url_issue', 'str_uncaught_exceptions', 'n_try_except', 'n_try_pass',
       'n_finally', 'n_generic_except', 'n_raise', 'n_captures_broad_raise',
       'n_captures_try_except_raise', 'n_captures_misplaced_bare_raise',
       'n_try_else', 'n_try_return', 'str_except_identifiers',
       'str_raise_identifiers', 'str_except_block', 'n_nested_try',
       'n_bare_except', 'n_bare_raise_finally'],
      dtype='object')

In [38]:
df_total = df.agg({'file': pd.Series.nunique, 'function': pd.Series.nunique, 'n_try_pass': ['sum'], 'n_generic_except': ['sum'], 'n_nested_try': ['sum'], 
           'n_bare_except': ['sum'], 'n_captures_misplaced_bare_raise': ['sum'], 'n_captures_broad_raise' : ['sum'], 'n_bare_raise_finally' : ['sum']})

In [39]:
df_total

Unnamed: 0,file,function,n_try_pass,n_generic_except,n_nested_try,n_bare_except,n_captures_misplaced_bare_raise,n_captures_broad_raise,n_bare_raise_finally
nunique,1429.0,29647.0,,,,,,,
sum,,,1301.0,1608.0,108.0,481.0,11.0,622.0,2.0


In [62]:
df_rq5 = df.groupby(['project', 'commit_fix', 'url_issue', 'repo_url'], as_index=False).agg({'file': pd.Series.nunique, 'function': pd.Series.nunique, 'n_try_pass': ['sum'], 'n_generic_except': ['sum'], 'n_nested_try': ['sum'], 
               'n_bare_except': ['sum'], 'n_captures_misplaced_bare_raise': ['sum'], 'n_captures_broad_raise' : ['sum'], 'n_bare_raise_finally' : ['sum']}).reset_index()

In [63]:
df_rq5 = df_rq5.droplevel(1, axis=1)

In [64]:
smells_cols = ['n_try_pass', 'n_generic_except', 'n_nested_try', 'n_bare_except', 'n_captures_misplaced_bare_raise',	
              'n_captures_broad_raise', 'n_bare_raise_finally']

In [65]:
df_rq5['has_smell'] = df_rq5[smells_cols].sum(axis=1)

In [68]:
df_rq5[df_rq5['has_smell'] >0]

Unnamed: 0,index,project,commit_fix,url_issue,repo_url,file,function,n_try_pass,n_generic_except,n_nested_try,n_bare_except,n_captures_misplaced_bare_raise,n_captures_broad_raise,n_bare_raise_finally,has_smell
0,0,aiohttp,0d2e43bf2a920975a5da4d9295e0ba887080bf5b,https://github.com/aio-libs/aiohttp/issues/7237,https://github.com/aio-libs/aiohttp,2,143,1,0,0,0,0,0,0,1.0
2,2,ansible,019d078a5a457823e8d445d4e949b5ed041e2609,https://github.com/ansible/ansible/issues/55986,https://github.com/ansible/ansible,9,46,2,6,0,0,0,1,0,9.0
3,3,ansible,05879d331ae23b9c4a77b70425c3f8e48ab2e8ea,https://github.com/ansible/ansible/issues/20356,https://github.com/ansible/ansible,1,11,2,0,0,0,0,0,0,2.0
5,5,ansible,0b64408f5a2d2135afcb68988c51a3dd50e0124f,https://github.com/ansible/ansible/issues/5531,https://github.com/ansible/ansible,2,9,0,0,0,3,0,0,0,3.0
6,6,ansible,0c3216c5652411549ce32234a18c6ba2329d35ec,https://github.com/ansible/ansible/issues/44740,https://github.com/ansible/ansible,2,10,2,4,0,0,0,1,0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,480,vision,cdbbd6664bbd2e212739519aa0eb70c06252e88c,https://github.com/pytorch/vision/issues/7838,https://github.com/pytorch/vision,2,209,0,1,0,0,0,0,0,1.0
481,481,wagtail,8d1835f55c1700ec003fb11d299a7769a800a54c,https://github.com/wagtail/wagtail/issues/8699,https://github.com/wagtail/wagtail,4,107,1,2,1,0,0,2,0,6.0
484,484,xx-net,a183aca3d13ce6dcf7795f4efbdc9502b5016cf2,https://github.com/xx-net/xx-net/issues/10649,https://github.com/xx-net/xx-net,2,16,0,4,0,1,0,0,0,5.0
485,485,youtube-dl,b3f0e5304807862ce72c136da90b860df805ee5c,https://github.com/ytdl-org/youtube-dl/issues/...,https://github.com/ytdl-org/youtube-dl,1,25,0,1,0,0,0,1,0,2.0


In [67]:
df_rq5.to_csv("eh_fixes.csv")