In [9]:
import os
import glob
import pandas as pd


In [10]:

def read_all_csv_files(folder_path):
    # Use os.walk to traverse through the directory structure recursively

    all_data = []
    for root, dirs, files in os.walk(folder_path):
        # Use glob to filter only CSV files
        csv_files = glob.glob(os.path.join(root, '*.csv'))
        
        # Iterate over each CSV file and read it using pandas

        for csv_file in csv_files:
            if "all_data" in csv_file:
                continue
            try:
                project_name = csv_file.split('/')[1]
                result_type = csv_file.split('/')[-1].split('.')[0]
                additional = ''
                if 'Security' in result_type:
                    additional = '_'.join(result_type.split('_')[1:])
                    result_type = 'Security'
                
                print(project_name, result_type, additional)

                # Read CSV file into a pandas DataFrame
                df = pd.read_csv(csv_file)

                # Check every row and see if the file name is in the list of Java files
                # If it is, then write the row to a new CSV file
                for index, row in df.iterrows():
                    all_data.append([project_name, result_type, additional] + [str(x) for x in row])
                            
                
            except Exception as e:
                # Handle any errors that may occur during reading
                print(f"Error reading {csv_file}: {e}")

    return all_data

In [11]:
folder_path = './Output/'
all_data = read_all_csv_files(folder_path)

Output Security CWE-352
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Security_CWE-352.csv: No columns to parse from file
Output Security CWE-347
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Security_CWE-347.csv: No columns to parse from file
Output Security CWE-421
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Security_CWE-421.csv: No columns to parse from file
Output Security CWE-780
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Security_CWE-780.csv: No columns to parse from file
Output Language_Abuse 
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Language_Abuse.csv: No columns to parse from file
Output Performance 
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Performance.csv: No columns to parse from file
Output Security CWE-190
Error reading ./Output/08649025-20e3-42e6-abba-32c29722dc0c_0/Security_CWE-190.csv: No columns to parse from file
Output Security CWE-807
Error reading ./Output

In [12]:
df = pd.DataFrame(all_data, columns=["Project_name",	"Smell_Type",	"Extra_Info",	"Message",	"Message_Description",	"Message_Type",	"Project_specific_info",	"Path",	"S_Line",	"S_Col",	"E_Line",	"E_Col"])
df.to_csv('all_data.csv', index=False)

In [13]:
df = pd.read_csv('all_data.csv')
df.head()

Unnamed: 0,Project_name,Smell_Type,Extra_Info,Message,Message_Description,Message_Type,Project_specific_info,Path,S_Line,S_Col,E_Line,E_Col
0,Output,Advisory,,Missing Javadoc for public method or constructor,A public method or constructor that does not h...,recommendation,This method does not have a non-trivial Javado...,/src/main/java/Main.java,6,24,6,27
1,Output,Advisory,,Missing Javadoc for public method or constructor,A public method or constructor that does not h...,recommendation,This method does not have a non-trivial Javado...,/src/main/java/Main.java,5,24,5,27
2,Output,Advisory,,Missing Javadoc for public method or constructor,A public method or constructor that does not h...,recommendation,This constructor does not have a non-trivial J...,/src/main/java/Main.java,23,12,23,18
3,Output,Advisory,,Non-final immutable field,A field of immutable type that is assigned to ...,recommendation,This immutable field is not declared final but...,/src/main/java/Main.java,21,20,21,27
4,Output,Advisory,,Non-final immutable field,A field of immutable type that is assigned to ...,recommendation,This immutable field is not declared final but...,/src/main/java/Main.java,20,20,20,26


In [14]:
smell_group = df.groupby(['Smell_Type'])

In [25]:
data = []
for message, group in smell_group:
    unique_messages = group['Message'].unique()
    print('Type:', message[0])
    print('Number of instances:', len(group))
    print('Number of unique messages:', len(unique_messages))
    print("-"*20)
    messages_group = group.groupby(['Message'])
    # sort by number of instances
    messages_group = sorted(messages_group, key=lambda x: len(x[1]), reverse=True)
    for name, group in messages_group:
        # print(name[0], len(group))
        data.append([message[0],name[0], len(group)])

    print("#"*20)


Type: Advisory
Number of instances: 122
Number of unique messages: 8
--------------------
####################
Type: Architecture
Number of instances: 8
Number of unique messages: 1
--------------------
####################
Type: DeadCode
Number of instances: 36
Number of unique messages: 4
--------------------
####################
Type: Security
Number of instances: 2
Number of unique messages: 2
--------------------
####################
Type: Violations_of_Best_Practice
Number of instances: 42
Number of unique messages: 6
--------------------
####################


In [24]:
with open('data.txt', 'w') as f:
    for item in data:
        item = ",".join([str(x) for x in item])
        f.write("%s\n" % item)