In [70]:
import pandas as pd
from io import StringIO
from sklearn.metrics import f1_score


# Read data from the CSV file and select only the required columns.
filename = 'results_all_bins.csv'

if 'src' in filename:
    out_csv = 'reentrancy_metrics_data_src.csv'
    latex_file = 'latex_table_src.csv'
    aggregated_file = 'aggregated_results_src.csv'
else:
    out_csv = 'reentrancy_metrics_data_bins.csv'
    latex_file = 'latex_table_bins.csv'
    aggregated_file = 'aggregated_results_bins.csv'
df = pd.read_csv(filename)
df = df[['filename', 'basename', 'exit_code', 'toolid', 'findings']]

df['exit_code'] = df['exit_code'].fillna(-1).astype(int)
#print(df.shape)
df = df[~((df['toolid'] == 'ethor-2023') & (df['findings'] == '{}'))]
df = df[df['basename'].str.contains('_safe|_ree', na=False)]

#print(df.shape)
# A dictionary mapping each tool to the string(s) it produces for a reentrancy finding.
# You can easily update this dictionary as needed. For tools with multiple labels,
# use a comma-separated string, e.g., 'tool_name': 'label1,label2'.
reentrancy_labels = {
    'ccc': 'Reentrancy_Vulnerability',
    'confuzzius': 'Reentrancy',
    'conkas': 'Reentrancy', #.sol 0.5
    #'manticore-0.3.7': 'Reentrancy', # placeholder
    'mythril-0.24.7': 'State_access_after_external_call_SWC_107',
    'oyente+-2acaf2e': 'Re_Entrancy_Vulnerability',
    'securify': 'DAO', 
    'securify2': 'Reentrancy', # does not work
    'sfuzz': 'Reentrancy', 
    'slither-0.11.3': 'reentrancy_eth,reentrancy_no_eth',
    #'smartcheck': 'Reentrancy', # never finds any occurrence of reentrancy
    'solhint-6.0.0': 'reentrancy',
    #'ethainter': 'Reentrancy', # does not work
    'ethor-2023': 'insecure',
    'oyente+-060ca34':'Re_Entrancy_Vulnerability',
    'vandal': 'ReentrantCall',
    'gpt-oss': 'reentrant',
    'gpt-5-mini': 'reentrant',
    'gpt-5': 'reentrant',
    'gpt-5-nano': 'reentrant'
    }

# 1. Determine the "true" reentrancy label for each file based on its filename.
# 'ree' followed by an optional number indicates a true reentrancy vulnerability.
# df['true_reentrancy'] = df['filename'].str.contains(r'ree\d*\.sol', case=False)
df['true_reentrancy'] = df['basename'].str.contains(r'_ree', case=False)

# 2. Determine the "predicted" reentrancy label based on the 'findings' column.
# This function will check if any of the tool-specific reentrancy labels are present in the findings.
def get_prediction(row):
    tool_id = row['toolid']
    findings = str(row['findings']) # Convert to string to handle potential NaN values

    # Check if the tool is in our labels dictionary.
    if tool_id in reentrancy_labels:
        # Split the tool's finding string into a list of individual labels.
        tool_findings = [f.strip() for f in reentrancy_labels[tool_id].split(',')]
        
        # Check if any of the tool's labels are present in the findings from the data.
        for label in tool_findings:
            if label in findings:
                return True
    return False

df['predicted_reentrancy'] = df.apply(get_prediction, axis=1)
#print(df['exit_code']==1)


# Save the DataFrame to a new CSV file.
df.to_csv(out_csv, index=False)

# 3. Calculate metrics for each unique tool and print only the results.
# Analyze only the tools present in the reentrancy_labels dictionary.
tools_to_analyze = reentrancy_labels.keys()

print("Reentrancy Metrics per Tool:")
print("=" * 30)
versions = ['0_8', '0_5', '0_4']


for version in versions:
   
    version_df = df[df['filename'].str.contains(version)]
    print('*' * 100)
    print('Results for Solidity version:', version.replace('_', '.'))
    print('*' * 100)
    for tool in tools_to_analyze:

        # Filter the DataFrame for the current tool.
        tool_df = version_df[version_df['toolid'] == tool]
        n_results = tool_df.shape[0]

        ERRORS = (tool_df['exit_code'] != 0).sum()

        ERRORS2  = tool_df[(tool_df['exit_code'] != 0) & (tool_df['findings'] == '{}')].shape[0]/n_results if n_results >0 else 0
        # Exclude rows where exit_code is not

        

        tool_df = tool_df[(tool_df['exit_code'] == '0') | ((tool_df['exit_code'] != 0) & tool_df['findings']!= '{}')]

        #print( tool_df['findings'], tool_df['findings']!= '{}')
        #tool_df = tool_df[tool_df['exit_code'] == '0']
        # if tool == 'vandal':
        #     print(tool_df, tool_df.shape)

        # Calculate True Positives (TP), False Positives (FP), True Negatives (TN), and False Negatives (FN).
        TP = len(tool_df[(tool_df['true_reentrancy'] == True) & (tool_df['predicted_reentrancy'] == True)])
        FP = len(tool_df[(tool_df['true_reentrancy'] == False) & (tool_df['predicted_reentrancy'] == True)])
        TN = len(tool_df[(tool_df['true_reentrancy'] == False) & (tool_df['predicted_reentrancy'] == False)])
        FN = len(tool_df[(tool_df['true_reentrancy'] == True) & (tool_df['predicted_reentrancy'] == False)])
        # print(TP, FP, TN, FN)
        
        # Calculate Accuracy, Precision, and Recall.
        # Handle cases where the denominator is zero to avoid errors.
        accuracy = (TP + TN) / (TP + FP + TN + FN) if (TP + FP + TN + FN) > 0 else 0
        
        # Precision: Out of all positive predictions, how many were correct?
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        
        # Recall: Out of all actual positives, how many were correctly predicted?
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0

        # Calculate the F1 Score
        f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1_score = 2 * TP / (2 * TP + FP + FN) if (2 * TP + FP + FN) > 0 else 0
        #f1_score = f1_score(tool_df['true_reentrancy'], tool_df['predicted_reentrancy'], zero_division=0, average = 'weighted')
        if f1_score > 0:
            
            print(f"Tool: {tool}")
            print(f"  Accuracy:  {accuracy*100:.2f}")
            print(f"  Precision: {precision*100:.2f}")
            print(f"  Recall:    {recall*100:.2f}")
            print(f"  F1 Score:  {f1_score*100:.2f}")
            #print(f"  Errors: {ERRORS}")
            print(f"  Errors: {ERRORS2*100:.2f}")
            print("-" * 30)


Reentrancy Metrics per Tool:
****************************************************************************************************
Results for Solidity version: 0.8
****************************************************************************************************
Tool: vandal
  Accuracy:  52.94
  Precision: 50.00
  Recall:    89.06
  F1 Score:  64.04
  Errors: 13.97
------------------------------
****************************************************************************************************
Results for Solidity version: 0.5
****************************************************************************************************
Tool: vandal
  Accuracy:  53.03
  Precision: 49.56
  Recall:    91.80
  F1 Score:  64.37
  Errors: 11.36
------------------------------
****************************************************************************************************
Results for Solidity version: 0.4
********************************************************************************************

In [None]:
# from pathlib import Path

# # Load your CSV
# df = pd.read_csv(out_csv)

# # 1. Keep only rows where basename contains '_ree'
# df = df[df['basename'].str.contains('_ree', na=False)]

# # 2. Extract folder path from filename (everything except the last component)
# df['path'] = df['filename'].apply(lambda x: str(Path(x).parent))

# # 3. Compute TP, FP, FN, TN flags
# df['TP'] = (df['true_reentrancy'] & df['predicted_reentrancy'])
# df['FP'] = (~df['true_reentrancy'] & df['predicted_reentrancy'])
# df['FN'] = (df['true_reentrancy'] & ~df['predicted_reentrancy'])
# df['TN'] = (~df['true_reentrancy'] & ~df['predicted_reentrancy'])

# # Aggregate by both path and tool_id, include total count
# agg = (
#     df.groupby(['path', 'toolid'])
#       .agg(
#           TP=('TP', 'sum'),
#           FP=('FP', 'sum'),
#           FN=('FN', 'sum'),
#           TN=('TN', 'sum'),
#           total_rows=('filename', 'count')
#       )
#       .reset_index()
# )


# # 5. Save to CSV
# agg.to_csv(aggregated_file, index=False)

In [74]:
import pandas as pd
from pathlib import Path
import os

# ---------- Configuration ----------
INPUT_CSV = out_csv  # change to your actual CSV file
OUTPUT_PREFIX = "results_"   # prefix for generated .tex files


# ---------- Load and filter data ----------
df = pd.read_csv(INPUT_CSV)

# Keep only rows where basename contains '_ree'
df = df[df['basename'].str.contains('_ree', na=False)]

# Extract folder path (directory of each file)
if '_bins' in INPUT_CSV:
    # Keep only the parent folder up to the desired level (drop the last subdir)
    df['path'] = df['filename'].apply(lambda x: str(Path(x).parent.parent))
else:
    df['path'] = df['filename'].apply(lambda x: str(Path(x).parent))

# Keep forward slashes (safe for LaTeX)
df['path'] = df['path'].str.replace('\\', '/', regex=False)

# Extract version number (e.g., 0_4, 0_5, 0_8) from the path
df['version'] = df['path'].str.extract(r'[/\\](\d_\d)[/\\]')
print(df['version'])
# ---------- Compute results ----------
# Correct if tool predicted reentrancy correctly
df['correct'] = (df['true_reentrancy'] & df['predicted_reentrancy'])

# Group by version, tool, and path
agg = (
    df.groupby(['version', 'toolid', 'path'])
      .agg(correct=('correct', 'sum'), total=('filename', 'count'))
      .reset_index()
)

# Create string like "1/3"
agg['score'] = agg['correct'].astype(str) + '/' + agg['total'].astype(str)
# Keep only tools that found at least one true positive
tools_with_tp = agg.groupby('toolid')['correct'].sum()
#print(tools_with_tp)
tools_to_keep = tools_with_tp[tools_with_tp > 0].index
agg = agg[agg['toolid'].isin(tools_to_keep)]



for ver, subdf in agg.groupby('version'):
    print(ver)
    # Pivot: rows = tools, columns = paths
    table = (
        subdf.pivot(index='toolid', columns='path', values='score')
             .fillna('-')
             .sort_index(axis=1)
    )

    # Convert Index to list of strings
    cols = list(table.columns)

    # Compute the common prefix of all columns
    common_prefix = os.path.commonprefix(cols)

    # Remove trailing slash if any
    if common_prefix.endswith('/') or common_prefix.endswith('\\'):
        common_prefix = common_prefix[:-1]

    # Remove the common prefix from each column
    rotated_cols = [
        "\\rotatebox{90}{" + col[len(common_prefix):].lstrip('/\\').replace('_', r'\_') + "}"
        for col in cols
    ]
    table.columns = rotated_cols


    # Generate LaTeX
    latex = table.to_latex(
        escape=False,
        index=True,
        caption=f"Reentrancy detection results for Solidity {ver}",
        label=f"tab:{ver}",
        longtable=False
    )

    # Save to file

    output_file = f"{OUTPUT_PREFIX}{ver}.tex"
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(latex)

    print(f"Saved LaTeX table for {ver} → {output_file}")
    print(f"→ Removed common prefix and version: '{version_prefix}'")

print("✅ Done.")


0       0_4
1       0_5
10      0_4
11      0_5
12      0_4
       ... 
2758    0_8
2759    0_8
2760    0_8
2765    0_8
2770    0_8
Name: version, Length: 1288, dtype: object
0_4
Saved LaTeX table for 0_4 → results_0_4.tex
→ Removed common prefix and version: 'handcrafted_tests/src/0_8/0_8'
0_5
Saved LaTeX table for 0_5 → results_0_5.tex
→ Removed common prefix and version: 'handcrafted_tests/src/0_8/0_8'
0_8
Saved LaTeX table for 0_8 → results_0_8.tex
→ Removed common prefix and version: 'handcrafted_tests/src/0_8/0_8'
✅ Done.


In [None]:
# import pandas as pd
# from pathlib import Path
# import os

# # ---------- Configuration ----------
# INPUT_CSV = out_csv  # change to your actual CSV file
# OUTPUT_PREFIX = "results_"   # prefix for generated .tex files


# # ---------- Load and filter data ----------
# df = pd.read_csv(INPUT_CSV)

# # Keep only rows where basename contains '_ree'
# df = df[df['basename'].str.contains('_ree', na=False)]

# # Extract folder path (directory of each file)
# df['path'] = df['filename'].apply(lambda x: str(Path(x).parent))

# # Keep forward slashes (safe for LaTeX)
# df['path'] = df['path'].str.replace('\\', '/', regex=False)

# # Extract version number (e.g., 0_4, 0_5, 0_8) from the path
# df['version'] = df['path'].str.extract(r'[/\\](\d_\d)[/\\]')

# # ---------- Compute results ----------
# # Correct if tool predicted reentrancy correctly
# df['correct'] = (df['true_reentrancy'] & df['predicted_reentrancy'])

# # Group by version, tool, and path
# agg = (
#     df.groupby(['version', 'toolid', 'path'])
#       .agg(correct=('correct', 'sum'), total=('filename', 'count'))
#       .reset_index()
# )

# # Create string like "1/3"
# agg['score'] = agg['correct'].astype(str) + '/' + agg['total'].astype(str)
# # Keep only tools that found at least one true positive
# tools_with_tp = agg.groupby('toolid')['correct'].sum()
# tools_to_keep = tools_with_tp[tools_with_tp > 0].index
# agg = agg[agg['toolid'].isin(tools_to_keep)]


# for ver, subdf in agg.groupby('version'):
#     # Pivot: rows = tools, columns = paths
#     table = (
#         subdf.pivot(index='toolid', columns='path', values='score')
#              .fillna('-')
#              .sort_index(axis=1)
#     )

#     # Convert Index to list of strings
#     cols = list(table.columns)

#     # Compute the common prefix of all columns
#     common_prefix = os.path.commonprefix(cols)

#     # Remove trailing slash if any
#     if common_prefix.endswith('/') or common_prefix.endswith('\\'):
#         common_prefix = common_prefix[:-1]

#     # Remove the common prefix from each column
#     rotated_cols = [
#         "\\rotatebox{90}{" + col[len(common_prefix):].lstrip('/\\').replace('_', r'\_') + "}"
#         for col in cols
#     ]
#     table.columns = rotated_cols

#     # Generate LaTeX table as string
#     latex_body = table.to_latex(
#         escape=False,
#         index=True
#     )

#     # Wrap with resizebox and add caption/label manually
#     latex = (
#         "\\begin{table}\n"
#         f"\\caption{{Reentrancy detection results for Solidity {ver}}}\n"
#         f"\\label{{tab:{ver}}}\n"
#         "\\resizebox{\\textwidth}{!}{%\n"
#         f"{latex_body}"
#         "}\n"
#         "\\end{table}\n"
#     )

#     # Save to file
#     output_file = f"{OUTPUT_PREFIX}{ver}.tex"
#     with open(output_file, "w", encoding="utf-8") as f:
#         f.write(latex)

#     print(f"Saved LaTeX table for {ver} → {output_file}")

# print("✅ Done.")


Saved LaTeX table for 0_4 → results_0_4.tex
Saved LaTeX table for 0_5 → results_0_5.tex
✅ Done.
