### Research questions 1-4

In [19]:
import os
import pandas as pd
import numpy as np
pd.set_option("display.max_rows", 140)


def load_csvs(directory):
    res = []

    for root, dirs, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.csv'):                
                file_path = os.path.join(root, file_name)
                try:
                    cwe_res = pd.read_csv(file_path, header=None)
                    res.append(cwe_res)
                except pd.errors.EmptyDataError:
                    ...
    return pd.concat(res)

def get_cwes(directory):
    codeql_results = load_csvs(directory)
    codeql_results.rename(columns={0: "cwe"}, inplace=True)
    codeql_results = codeql_results.groupby("cwe").size().reset_index()
    codeql_results.columns = ['cwe', 'count']
    codeql_results = codeql_results.sort_values(by="count", ascending=False)
    return codeql_results
    

In [44]:
get_cwes('../vulnerability_analysis')

Unnamed: 0,cwe,count
8,Flask app is run in debug mode,36
13,Information exposure through an exception,25
10,Hard-coded credentials,19
23,URL redirection from remote source,17
18,Reflected server-side cross-site scripting,13
31,Useless regular-expression character escape,12
22,Time-of-check time-of-use filesystem race cond...,11
16,Missing rate limiting,11
20,Server-side URL redirect,10
21,Server-side request forgery,9


### RQ1

In [28]:
codeql_results = get_cwes('../vulnerability_analysis/rq_1')
codeql_results

Unnamed: 0,cwe,count
2,Flask app is run in debug mode,36
4,Information exposure through an exception,25
5,Reflected server-side cross-site scripting,13
6,URL redirection from remote source,7
11,Use of insecure SSL/TLS version,6
0,Code injection,5
3,Full server-side request forgery,5
9,Use of a broken or weak cryptographic algorithm,5
10,Use of a broken or weak cryptographic hashing ...,5
7,Uncontrolled command line,3


In [24]:
get_cwes('../vulnerability_analysis/rq_1/copilot')

Unnamed: 0,cwe,count
1,Flask app is run in debug mode,9
4,Reflected server-side cross-site scripting,3
2,Full server-side request forgery,2
5,URL redirection from remote source,2
0,Deserialization of user-controlled data,1
3,Information exposure through an exception,1
6,Use of a broken or weak cryptographic algorithm,1
7,Use of insecure SSL/TLS version,1
8,XML internal entity expansion,1


In [26]:
get_cwes('../vulnerability_analysis/rq_1/tabnine')

Unnamed: 0,cwe,count
3,Information exposure through an exception,7
1,Flask app is run in debug mode,6
0,Code injection,2
4,Reflected server-side cross-site scripting,2
10,Use of insecure SSL/TLS version,2
2,Full server-side request forgery,1
5,URL redirection from remote source,1
6,Uncontrolled command line,1
7,Uncontrolled data used in path expression,1
8,Use of a broken or weak cryptographic algorithm,1


In [25]:
get_cwes('../vulnerability_analysis/rq_1/chatgpt')

Unnamed: 0,cwe,count
1,Flask app is run in debug mode,15
3,Information exposure through an exception,13
0,Code injection,2
4,Reflected server-side cross-site scripting,2
5,URL redirection from remote source,2
8,Use of a broken or weak cryptographic hashing ...,2
2,Full server-side request forgery,1
6,Uncontrolled command line,1
7,Use of a broken or weak cryptographic algorithm,1
9,Use of insecure SSL/TLS version,1


In [27]:
get_cwes('../vulnerability_analysis/rq_1/codegeex')

Unnamed: 0,cwe,count
1,Flask app is run in debug mode,6
4,Reflected server-side cross-site scripting,6
3,Information exposure through an exception,4
5,URL redirection from remote source,2
7,Uncontrolled data used in path expression,2
8,Use of a broken or weak cryptographic algorithm,2
9,Use of a broken or weak cryptographic hashing ...,2
10,Use of insecure SSL/TLS version,2
0,Code injection,1
2,Full server-side request forgery,1


### RQ2

In [33]:
get_cwes('../vulnerability_analysis/rq_2')

Unnamed: 0,cwe,count
1,Time-of-check time-of-use filesystem race cond...,11
3,Uncontrolled data used in OS command,7
0,CGI script vulnerable to cross-site scripting,5
2,Uncontrolled data in arithmetic expression,3
4,Wrong type of arguments to formatting function,2


In [29]:
get_cwes('../vulnerability_analysis/rq_2/copilot')

Unnamed: 0,cwe,count
1,Flask app is run in debug mode,9
4,Reflected server-side cross-site scripting,3
2,Full server-side request forgery,2
5,URL redirection from remote source,2
0,Deserialization of user-controlled data,1
3,Information exposure through an exception,1
6,Use of a broken or weak cryptographic algorithm,1
7,Use of insecure SSL/TLS version,1
8,XML internal entity expansion,1


In [30]:
get_cwes('../vulnerability_analysis/rq_2/tabnine')

Unnamed: 0,cwe,count
0,Time-of-check time-of-use filesystem race cond...,5
2,Uncontrolled data used in OS command,2
3,Wrong type of arguments to formatting function,2
1,Uncontrolled data in arithmetic expression,1


In [31]:
get_cwes('../vulnerability_analysis/rq_2/chatgpt')

Unnamed: 0,cwe,count
1,Time-of-check time-of-use filesystem race cond...,3
0,CGI script vulnerable to cross-site scripting,2
3,Uncontrolled data used in OS command,2
2,Uncontrolled data in arithmetic expression,1


In [32]:
get_cwes('../vulnerability_analysis/rq_2/codegeex')

Unnamed: 0,cwe,count
0,Time-of-check time-of-use filesystem race cond...,3


### RQ3

In [34]:
get_cwes('../vulnerability_analysis/rq_3')

Unnamed: 0,cwe,count
2,URL redirection from remote source,10
4,Uncontrolled data used in path expression,3
5,User-controlled bypass of sensitive method,3
0,Arbitrary file access during archive extractio...,2
1,Denial of Service from comparison of user inpu...,1
3,Uncontrolled command line,1


In [35]:
get_cwes('../vulnerability_analysis/rq_3/copilot')

Unnamed: 0,cwe,count
0,URL redirection from remote source,3
1,Uncontrolled command line,1
2,Uncontrolled data used in path expression,1


In [36]:
get_cwes('../vulnerability_analysis/rq_3/tabnine')

Unnamed: 0,cwe,count
2,URL redirection from remote source,3
0,Arbitrary file access during archive extractio...,1
1,Denial of Service from comparison of user inpu...,1
3,User-controlled bypass of sensitive method,1


In [37]:
get_cwes('../vulnerability_analysis/rq_3/chatgpt')

Unnamed: 0,cwe,count
1,URL redirection from remote source,3
3,User-controlled bypass of sensitive method,2
0,Arbitrary file access during archive extractio...,1
2,Uncontrolled data used in path expression,1


In [38]:
get_cwes('../vulnerability_analysis/rq_3/codegeex')

Unnamed: 0,cwe,count
0,URL redirection from remote source,1
1,Uncontrolled data used in path expression,1


### RQ4

In [39]:
get_cwes('../vulnerability_analysis/rq_4')

Unnamed: 0,cwe,count
4,Hard-coded credentials,19
16,Useless regular-expression character escape,12
9,Missing rate limiting,11
12,Server-side URL redirect,10
13,Server-side request forgery,9
10,Reflected cross-site scripting,8
1,Clear text transmission of sensitive cookie,5
11,Sensitive data read from GET request,4
8,Missing CSRF middleware,2
6,Information exposure through a stack trace,2


In [40]:
get_cwes('../vulnerability_analysis/rq_4/copilot')

Unnamed: 0,cwe,count
0,Hard-coded credentials,5
2,Missing rate limiting,3
6,Server-side request forgery,3
5,Server-side URL redirect,2
1,Information exposure through a stack trace,1
3,Reflected cross-site scripting,1
4,Sensitive data read from GET request,1


In [41]:
get_cwes('../vulnerability_analysis/rq_4/tabnine')

Unnamed: 0,cwe,count
8,Useless regular-expression character escape,12
4,Reflected cross-site scripting,5
2,Hard-coded credentials,4
6,Server-side URL redirect,3
3,Missing rate limiting,2
0,Clear text transmission of sensitive cookie,1
1,Exception text reinterpreted as HTML,1
5,Sensitive data read from GET request,1
7,Server-side request forgery,1


In [42]:
get_cwes('../vulnerability_analysis/rq_4/chatgpt')

Unnamed: 0,cwe,count
2,Hard-coded credentials,7
0,Clear text transmission of sensitive cookie,3
4,Missing rate limiting,3
7,Server-side URL redirect,3
8,Server-side request forgery,3
3,Missing CSRF middleware,2
1,Code injection,1
5,Reflected cross-site scripting,1
6,Sensitive data read from GET request,1
9,Uncontrolled command line,1


In [43]:
get_cwes('../vulnerability_analysis/rq_4/codegeex')

Unnamed: 0,cwe,count
3,Hard-coded credentials,3
7,Missing rate limiting,3
10,Server-side URL redirect,2
11,Server-side request forgery,2
0,CORS misconfiguration for credentials transfer,1
1,Clear text transmission of sensitive cookie,1
2,Exception text reinterpreted as HTML,1
4,Inefficient regular expression,1
5,Information exposure through a stack trace,1
6,Insecure randomness,1
