In [131]:
import sys
sys.path.append('../main')
import sql
import pandas as pd
from functools import reduce
def get(q):
    return pd.DataFrame(sql.execute(q))

In [132]:
q='''select count(*) as c from alert
where language in ('C','C++')
and is_invalid=0;'''
total = sql.execute(q)[0]['c']
total


60134

In [133]:
q='''select c.id, c.name, count(*) as prevalence from alert a
        join memory_error me on a.alert_type_id = me.alert_type_id
        join cwe c on a.CWE=c.id
        where language in ('C','C++')
        and is_invalid=0
        and memory=1
        and a.CWE is not null
        group by c.id, c.name
        order by prevalence desc;'''
df=get(q)
df['rate (%)']=round((df.prevalence/total)*100,2)
df = df.sort_values('rate (%)',ascending=False)
df=df.rename(columns={'id':'CWE'})

In [134]:
q='''select CWE, count(*) as fixed from alert a
join memory_error me on a.alert_type_id = me.alert_type_id
where language in ('C','C++')
and is_invalid=0
and memory=1
  and status='Fixed'
and CWE is not null
group by CWE;
'''
temp= get(q)
df=reduce(lambda x,y:pd.merge(x,y,on='CWE',how='left'),[df,temp])
df.fillna(0, inplace=True)
df.fixed=round(df.fixed/df.prevalence*100,2)
df

Unnamed: 0,CWE,name,prevalence,rate (%),fixed
0,476,NULL Pointer Dereference,8689,14.45,81.86
1,404,Improper Resource Shutdown or Release,4250,7.07,66.92
2,119,Improper Restriction of Operations within the ...,2125,3.53,67.06
3,457,Use of Uninitialized Variable,1862,3.1,74.01
4,190,Integer Overflow or Wraparound,1003,1.67,54.24
5,125,Out-of-bounds Read,939,1.56,67.41
6,416,Use After Free,877,1.46,73.43
7,120,Buffer Copy without Checking Size of Input ('C...,751,1.25,78.7
8,170,Improper Null Termination,378,0.63,68.78
9,590,Free of Memory not on the Heap,363,0.6,50.96


In [135]:
q='''select CWE, count(*) as triaged_bug from alert a
join memory_error me on a.alert_type_id = me.alert_type_id
where language in ('C','C++')
and is_invalid=0
and memory=1
  and classification = 'Bug'
and CWE is not null
group by CWE;'''
temp= get(q)
df=reduce(lambda x,y:pd.merge(x,y,on='CWE',how='left'),[df,temp])
df.fillna(0, inplace=True)
df['triaged_bug']=round(df['triaged_bug']/df.prevalence*100,2)
df

Unnamed: 0,CWE,name,prevalence,rate (%),fixed,triaged_bug
0,476,NULL Pointer Dereference,8689,14.45,81.86,21.99
1,404,Improper Resource Shutdown or Release,4250,7.07,66.92,14.33
2,119,Improper Restriction of Operations within the ...,2125,3.53,67.06,4.42
3,457,Use of Uninitialized Variable,1862,3.1,74.01,15.52
4,190,Integer Overflow or Wraparound,1003,1.67,54.24,7.88
5,125,Out-of-bounds Read,939,1.56,67.41,8.2
6,416,Use After Free,877,1.46,73.43,7.41
7,120,Buffer Copy without Checking Size of Input ('C...,751,1.25,78.7,2.13
8,170,Improper Null Termination,378,0.63,68.78,7.94
9,590,Free of Memory not on the Heap,363,0.6,50.96,0.0


In [136]:
q='''select CWE, count(*) as triaged_fp from alert a
join memory_error me on a.alert_type_id = me.alert_type_id
where language in ('C','C++')
and is_invalid=0
and memory=1
  and (classification = 'False Positive' or classification='Intentional')
and CWE is not null
group by CWE;'''
temp= get(q)
df=reduce(lambda x,y:pd.merge(x,y,on='CWE',how='left'),[df,temp])
df.fillna(0, inplace=True)
df['triaged_fp']=round(df['triaged_fp']/df.prevalence*100,2)
df

Unnamed: 0,CWE,name,prevalence,rate (%),fixed,triaged_bug,triaged_fp
0,476,NULL Pointer Dereference,8689,14.45,81.86,21.99,4.12
1,404,Improper Resource Shutdown or Release,4250,7.07,66.92,14.33,18.99
2,119,Improper Restriction of Operations within the ...,2125,3.53,67.06,4.42,12.89
3,457,Use of Uninitialized Variable,1862,3.1,74.01,15.52,6.12
4,190,Integer Overflow or Wraparound,1003,1.67,54.24,7.88,7.48
5,125,Out-of-bounds Read,939,1.56,67.41,8.2,11.82
6,416,Use After Free,877,1.46,73.43,7.41,19.27
7,120,Buffer Copy without Checking Size of Input ('C...,751,1.25,78.7,2.13,3.6
8,170,Improper Null Termination,378,0.63,68.78,7.94,9.26
9,590,Free of Memory not on the Heap,363,0.6,50.96,0.0,6.89


In [137]:
q='''select CWE, count(*) as actionable from alert a
join memory_error me on a.alert_type_id = me.alert_type_id
join actionability a2 on a.id = a2.alert_id
where language in ('C','C++')
and is_invalid=0
and memory=1
and CWE is not null
and actionability=1
group by CWE;'''
temp= get(q)
df=reduce(lambda x,y:pd.merge(x,y,on='CWE',how='left'),[df,temp])
df.fillna(0, inplace=True)
df.actionable=round(df.actionable/df.prevalence*100,2)
df

Unnamed: 0,CWE,name,prevalence,rate (%),fixed,triaged_bug,triaged_fp,actionable
0,476,NULL Pointer Dereference,8689,14.45,81.86,21.99,4.12,58.42
1,404,Improper Resource Shutdown or Release,4250,7.07,66.92,14.33,18.99,43.41
2,119,Improper Restriction of Operations within the ...,2125,3.53,67.06,4.42,12.89,33.22
3,457,Use of Uninitialized Variable,1862,3.1,74.01,15.52,6.12,53.49
4,190,Integer Overflow or Wraparound,1003,1.67,54.24,7.88,7.48,35.39
5,125,Out-of-bounds Read,939,1.56,67.41,8.2,11.82,42.81
6,416,Use After Free,877,1.46,73.43,7.41,19.27,36.26
7,120,Buffer Copy without Checking Size of Input ('C...,751,1.25,78.7,2.13,3.6,15.98
8,170,Improper Null Termination,378,0.63,68.78,7.94,9.26,56.88
9,590,Free of Memory not on the Heap,363,0.6,50.96,0.0,6.89,30.03


In [138]:
q='''select CWE, lifespan from alert a
join memory_error me on a.alert_type_id = me.alert_type_id
join actionability a2 on a.id = a2.alert_id
join
(select a.id, datediff(s.date,first_detected) as lifespan from alert a
join snapshot s
on a.last_snapshot_id=s.id) t1
on a.id=t1.id
where language in ('C','C++')
and is_invalid=0
and memory=1
and CWE is not null
and actionability=1;'''
l=get(q)
l=l.groupby('CWE')[['lifespan']].median()
l

Unnamed: 0_level_0,lifespan
CWE,Unnamed: 1_level_1
119,278.0
120,515.0
125,244.0
129,244.0
131,35.0
170,379.5
188,454.0
190,315.0
197,1260.0
252,385.0


In [139]:
df=reduce(lambda x,y:pd.merge(x,y,on='CWE',how='left'),[df,l])
df.fillna(0, inplace=True)
df.prevalence=df.prevalence.apply(lambda x:'{:,}'.format(x))
df.prevalence=df.prevalence+' ('+df['rate (%)'].astype(str)+'%)'
df=df.drop(columns='rate (%)')
df.fixed= df.fixed.astype(str) + '%'
df.triaged_big=df.triaged_bug.astype(str) + '%'
df.triaged_fp=df.triaged_fp.astype(str) + '%'
df.actionable=df.actionable.astype(str) + '%'
df = df.rename(columns={'CWE':'CEW-Id','name':'CWE-name','prevalence':'No. of alerts','fixed':'Eliminated', 'triaged_bug':'Triaged Bug by Devs.',
'triaged_fp':'Triaged False Positive',
'actionable':'Actionable', 'lifespan':'lifespan (days)'})
df=df[['CEW-Id','CWE-name','No. of alerts','Eliminated','Actionable',
'lifespan (days)','Triaged Bug by Devs.','Triaged False Positive']]

In [143]:
from tabulate import tabulate
df=df.head(10)
print(tabulate(df,tablefmt='pipe',showindex=False, headers='keys'))

|   CEW-Id | CWE-name                                                                | No. of alerts   | Eliminated   | Actionable   |   lifespan (days) |   Triaged Bug by Devs. | Triaged False Positive   |
|---------:|:------------------------------------------------------------------------|:----------------|:-------------|:-------------|------------------:|-----------------------:|:-------------------------|
|      476 | NULL Pointer Dereference                                                | 8,689 (14.45%)  | 81.86%       | 58.42%       |              44   |                  21.99 | 4.12%                    |
|      404 | Improper Resource Shutdown or Release                                   | 4,250 (7.07%)   | 66.92%       | 43.41%       |             122   |                  14.33 | 18.99%                   |
|      119 | Improper Restriction of Operations within the Bounds of a Memory Buffer | 2,125 (3.53%)   | 67.06%       | 33.22%       |             278   |                  

In [141]:
df.to_csv ('cwe_insights_on_coverity_data.csv', index = False, header=True)