In [5]:
import os
import pandas as pd

def count_lines_in_csv(file_path):
    try:
        df = pd.read_csv(file_path)
        return len(df)
    except pd.errors.EmptyDataError:
        return 0
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

def process_csv_files(directory):
    data = {'RQ': [], 'Tool': [], 'CWE': [], 'Scenario': [], 'Count lines': []}

    for root, dirs, files in os.walk(directory):
        for file_name in files:
            if file_name.endswith('.csv'):
                file_path = os.path.join(root, file_name)
                
                root_parts = str(file_path).split(os.sep)

                rq, tool, cwe, scenario = root_parts[2], root_parts[3], root_parts[4], root_parts[5]

                lines_count = count_lines_in_csv(file_path)

                data['RQ'].append(rq)
                data['Tool'].append(tool)
                data['CWE'].append(cwe)
                data['Scenario'].append(scenario)
                data['Count lines'].append(lines_count)

    df = pd.DataFrame(data)
    return df

# Specify the root directory
root_directory = '../vulnerability_analysis'

result_df = process_csv_files(root_directory)

# Display the resulting DataFrame
result_df

Unnamed: 0,RQ,Tool,CWE,Scenario,Count lines
0,rq_1,chatgpt,cwe_20,scenario_cwe_context,1
1,rq_1,chatgpt,cwe_20,scenario_cwe_definition,1
2,rq_1,chatgpt,cwe_20,scenario_secureval,0
3,rq_1,chatgpt,cwe_22,scenario_cwe_context,0
4,rq_1,chatgpt,cwe_22,scenario_cwe_definition,0
5,rq_1,chatgpt,cwe_22,scenario_secureval,0
6,rq_1,chatgpt,cwe_252,scenario_cwe_context,0
7,rq_1,chatgpt,cwe_252,scenario_cwe_definition,0
8,rq_1,chatgpt,cwe_252,scenario_secureval,0
9,rq_1,chatgpt,cwe_287,scenario_cwe_context,1


In [6]:
pd.set_option("display.max_rows", 140)

In [3]:
result_df

Unnamed: 0,RQ,Tool,CWE,Scenario,Count lines
0,rq_1,chatgpt,cwe_20,scenario_cwe_context,1
1,rq_1,chatgpt,cwe_20,scenario_cwe_definition,1
2,rq_1,chatgpt,cwe_20,scenario_secureval,0
3,rq_1,chatgpt,cwe_22,scenario_cwe_context,0
4,rq_1,chatgpt,cwe_22,scenario_cwe_definition,0
5,rq_1,chatgpt,cwe_22,scenario_secureval,0
6,rq_1,chatgpt,cwe_252,scenario_cwe_context,0
7,rq_1,chatgpt,cwe_252,scenario_cwe_definition,0
8,rq_1,chatgpt,cwe_252,scenario_secureval,0
9,rq_1,chatgpt,cwe_287,scenario_cwe_context,1


In [10]:
(result_df["Count lines"] > 0).sum()/len(result_df)

0.2826086956521739

In [11]:
result_df["Count lines"].sum()

20

In [7]:
result_df.groupby("RQ")["Count lines"].sum()

RQ
rq_1    20
rq_2     0
rq_3     0
rq_4     8
Name: Count lines, dtype: int64