In [2]:
import pandas as pd

# Load the dataset
file_path = "/home/riddhi/FlakyXbert/MAIN/data/IDoFT_data/Flakify_IDoFT_dataset.csv"
data = pd.read_csv(file_path)

# Calculate project counts and filter only those with more than 30 entries
project_counts = data['project'].value_counts()
filtered_projects = project_counts[project_counts > 30]

# Filter the data to include only the selected projects
filtered_data = data[data['project'].isin(filtered_projects.index)]

# Group by 'project' and 'category' to get counts of each category within each project
category_counts = filtered_data.groupby(['project', 'flaky']).size().unstack(fill_value=0)

# Create a DataFrame for project counts to ensure it has a compatible index for joining
project_counts_df = pd.DataFrame(filtered_projects)
project_counts_df.columns = ['project_count']

# Reset index to ensure 'project' is a column for a proper join
project_counts_df.reset_index(inplace=True)
category_counts.reset_index(inplace=True)

# Merge the dataframes on 'project'
result_df = pd.merge(project_counts_df, category_counts, on='project', how='left')

# Print the resulting DataFrame
print(result_df)

                   project  project_count    0    1
0         junit-quickcheck            250  119  131
1                    dubbo            186   15  171
2                   hadoop            149    3  146
3                     nifi            146    7  139
4             ormlite-core            114    1  113
5                  admiral            113    4  109
6                 fastjson            109   45   64
7   adyen-java-api-library             89   44   45
8                  wildfly             85    1   84
9                   Mapper             76    1   75
10       spring-data-r2dbc             68   31   37
11          Chronicle-Wire             63    3   60
12    typescript-generator             60    0   60
13          Java-WebSocket             54    0   54
14                 biojava             52    1   51
15                   hbase             52    2   50
16             spring-boot             48    0   48
17                visualee             47    0   47
18      inno

In [2]:
# Define F1 scores category wise per project
f1_scores = {
    'dubbo': 70.71,
    'hadoop': 57.95,
    'nifi': 96.52,
    'junit': 96.62,
    'admiral': 61.72,
    'wildfly': 73.85,
    'mapper': 100,
    'fast': 78.46,
    'java': 82.12,
    'biojava': 90.75,
    'spring': 88.88,
    'hbase': 72.96,
    'hateoas': 100,
    'hive': 100,
    'esper': 100,
    'nacos': 59.52
}

# Define support values
supports = {
    'dubbo': 170,
    'hadoop': 146,
    'nifi': 139,
    'junit': 131,
    'admiral': 109,
    'wildfly': 84,
    'mapper': 75,
    'fast': 64,
    'java': 54,
    'biojava': 51,
    'spring': 48,
    'hbase': 47,
    'hateoas': 41,
    'hive': 41,
    'esper': 38,
    'nacos': 32
}

precision_recall = {
    'dubbo': [71,71],
    'hadoop': [52,50],
    'nifi': [93,90],
    'junit': [94,94],
    'ormlite': [96,96],
    'admiral': [62,64],
    'wildfly': [73,76],
    'mapper': [100,100],
    'fast': [88,77],
    'java': [88,82],
    'biojava': [92,91],
    'spring': [91,89],
    'hbase': [73,78],
    'hive': [100,100],
    'nacos': [97,96]
}
# Calculate the weighted F1 score
total_support = sum(supports.values())
weighted_f1_sum = sum(f1_scores[project] * supports[project] for project in f1_scores)

weighted_f1_score = weighted_f1_sum / total_support

print(f"Weighted F1 Score: {weighted_f1_score:.2f}")


Weighted F1 Score: 85.82


project  project_count   0   1   2    3    4   5
0                    dubbo            170   9  19  66    7   12  57
1                   hadoop            146   0  22  85   31    8   0
2                     nifi            139   0   0  28  111    0   0
3         junit-quickcheck            131   0   0   2    7  122   0
4             ormlite-core            113   0   0  90   23    0   0
5                  admiral            109   0   7   2   75    5  20
6                  wildfly             84   0   0  43   30    1  10
7                   Mapper             75   0   0  70    5    0   0
8                 fastjson             64   2   3  16   43    0   0
9     typescript-generator             60   0   0   0   60    0   0
10          Chronicle-Wire             59   0   0   2   57    0   0
11          Java-WebSocket             54  33  21   0    0    0   0
12                 biojava             51   0  28   0   23    0   0
13             spring-boot             48   0   0  20    7   21   0
14                visualee             47   0   0  47    0    0   0
15                   hbase             47   0   1  27    4   13   2
16      innodb-java-reader             45   0   0   0   45    0   0
17  adyen-java-api-library             45   0   0   0   45    0   0
18          spring-hateoas             41   0   0   0   41    0   0
19                    hive             41   0   0  19   22    0   0
20       DataflowTemplates             39   0   0   0   39    0   0
21                   esper             38   1   0   1   36    0   0
22       spring-data-r2dbc             37   0   0   0   37    0   0
23           openhtmltopdf             35   0   0  35    0    0   0
24                   nacos             32   0   0  24    8    0   0

In [5]:
# Define F1 scores binary project wise
f1_scores = {
    'dubbo': 88.74,
    'hadoop': 95.02,
    'nifi': 91.57,
    'junit': 94,
    'admiral': 91.30,
    'fast': 91.30,
    'spring': 100,
    'adyen': 30,
    'mockserver':100,
    'commons': 100,
}

# Define support values
supports = {
    'dubbo': 186,
    'hadoop': 149,
    'nifi': 146,
    'junit': 250,
    'admiral': 113,
    'fast': 109,
    'spring': 68,
    'adyen': 89,
    'mockserver':39,
    'commons':35
}

precision_recall = {
    'dubbo': [92,87],
    'hadoop': [93,97],
    'nifi': [93,90],
    'junit': [94,94],
    'admiral': [91,91],
    'fast': [91,91],
    'spring': [100,100],
    'adyen': [24,44],
    'mockserver':[100,100],
    'commons':[100,100]
}

# Calculate the weighted F1 score
total_support = sum(supports.values())
weighted_f1_sum = sum(f1_scores[project] * supports[project] for project in f1_scores)
weighted_f1_score = weighted_f1_sum / total_support

# Calculate the non-weighted F1 score
non_weighted_f1_score = sum(f1_scores.values()) / len(f1_scores)

print(f"Weighted F1 Score: {weighted_f1_score:.2f}")
print(f"Non-Weighted F1 Score: {non_weighted_f1_score:.2f}")


Weighted F1 Score: 88.40
Non-Weighted F1 Score: 88.19
