In [5]:
import pandas as pd
from io import StringIO
from sklearn.metrics import f1_score

# Read data from the CSV file and select only the required columns.
df = pd.read_csv('results_rs_bins.csv')
df = df[['filename', 'toolid', 'findings']]

# A dictionary mapping each tool to the string(s) it produces for a reentrancy finding.
# You can easily update this dictionary as needed. For tools with multiple labels,
# use a comma-separated string, e.g., 'tool_name': 'label1,label2'.
reentrancy_labels = {
    'ccc': 'Reentrancy_Vulnerability',
    'confuzzius': 'Reentrancy',
    'conkas': 'Reentrancy', #.sol 0.5
    #'manticore-0.3.7': 'Reentrancy', # placeholder
    'mythril-0.24.7': 'State_access_after_external_call_SWC_107',
    'oyente+-2acaf2e': 'Re_Entrancy_Vulnerability',
    'securify': 'DAO', 
    #'securify2': 'Reentrancy', # does not work
    'sfuzz': 'Reentrancy', 
    'slither-0.11.3': 'reentrancy_eth,reentrancy_no_eth',
    'slither-0.10.4': 'reentrancy_eth,reentrancy_no_eth',
    #'smartcheck': 'Reentrancy', # never finds any occurrence of reentrancy
    'solhint-6.0.0': 'reentrancy',
    #'ethainter': 'Reentrancy', # does not work
    'ethor-2023': 'insecure',
    'oyente+-060ca34':'Callstack_Depth_Attack_Vulnerability',
    'vandal': 'ReentrantCall',
    'gpt-oss': 'reentrant',
    'gpt-5-mini': 'reentrant',
    'gpt-5': 'reentrant',
    'gpt-5-nano': 'reentrant'
    }

# 1. Determine the "true" reentrancy label for each file based on its filename.
# 'ree' followed by an optional number indicates a true reentrancy vulnerability.
# df['true_reentrancy'] = df['filename'].str.contains(r'ree\d*\.sol', case=False)
df['true_reentrancy'] = df['filename'].str.contains(r'_ree', case=False)

# 2. Determine the "predicted" reentrancy label based on the 'findings' column.
# This function will check if any of the tool-specific reentrancy labels are present in the findings.
def get_prediction(row):
    tool_id = row['toolid']
    findings = str(row['findings']) # Convert to string to handle potential NaN values
    
    # Check if the tool is in our labels dictionary.
    if tool_id in reentrancy_labels:
        # Split the tool's finding string into a list of individual labels.
        tool_findings = [f.strip() for f in reentrancy_labels[tool_id].split(',')]
        
        # Check if any of the tool's labels are present in the findings from the data.
        for label in tool_findings:
            if label in findings:
                return True
    return False

df['predicted_reentrancy'] = df.apply(get_prediction, axis=1)

# Save the DataFrame to a new CSV file.
#df.to_csv('reentrancy_metrics_data.csv', index=False)

# 3. Calculate metrics for each unique tool and print only the results.
# Analyze only the tools present in the reentrancy_labels dictionary.
tools_to_analyze = reentrancy_labels.keys()

print("Reentrancy Metrics per Tool:")
print("=" * 30)

working_tools = []

for tool in tools_to_analyze:
    
    # Filter the DataFrame for the current tool.
    tool_df = df[df['toolid'] == tool]
    
    # Calculate True Positives (TP), False Positives (FP), True Negatives (TN), and False Negatives (FN).
    TP = len(tool_df[(tool_df['true_reentrancy'] == True) & (tool_df['predicted_reentrancy'] == True)])
    FP = len(tool_df[(tool_df['true_reentrancy'] == False) & (tool_df['predicted_reentrancy'] == True)])
    TN = len(tool_df[(tool_df['true_reentrancy'] == False) & (tool_df['predicted_reentrancy'] == False)])
    FN = len(tool_df[(tool_df['true_reentrancy'] == True) & (tool_df['predicted_reentrancy'] == False)])
    
    # Calculate Accuracy, Precision, and Recall.
    # Handle cases where the denominator is zero to avoid errors.
    accuracy = (TP + TN) / (TP + FP + TN + FN) if (TP + FP + TN + FN) > 0 else 0
    
    # Precision: Out of all positive predictions, how many were correct?
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    
    # Recall: Out of all actual positives, how many were correctly predicted?
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    # Calculate the F1 Score
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    f1_score = 2 * TP / (2 * TP + FP + FN) if (2 * TP + FP + FN) > 0 else 0
    #f1_score = f1_score(tool_df['true_reentrancy'], tool_df['predicted_reentrancy'], zero_division=0, average = 'weighted')
    if f1_score > 0:
        working_tools.append(tool)
        print(f"Tool: {tool}")
        print(f"  Accuracy:  {accuracy:.4f}")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall:    {recall:.4f}")
        print(f"  F1 Score:  {f1_score:.4f}")
        print("-" * 30)
# Save only rows corresponding to working tools
df_filtered = df[df['toolid'].isin(working_tools)]
df_filtered.to_csv('reentrancy_metrics_data.csv', index=False)

Reentrancy Metrics per Tool:
Tool: oyente+-060ca34
  Accuracy:  0.5134
  Precision: 0.5215
  Recall:    0.5132
  F1 Score:  0.5173
------------------------------
Tool: vandal
  Accuracy:  0.5848
  Precision: 0.5894
  Recall:    0.5266
  F1 Score:  0.5563
------------------------------


In [27]:
import pandas as pd

df = pd.read_csv('reentrancy_metrics_data_all.csv')

# Definisci la "base contract name" senza versioni C.hex, ecc.
df['base_contract'] = df['filename'].str.replace(r'/.*\.hex$', '', regex=True)

# Categoria fino alla seconda underscore
df['category'] = df['base_contract'].apply(lambda x: '_'.join(x.split('_')[:2]))

# Contratti unici per categoria
category_group = df.groupby('category')['base_contract'].unique().reset_index()
category_group['total_contracts'] = category_group['base_contract'].apply(len)

# Conta i contratti reentranti una volta sola
def count_reentrant(contracts):
    return sum(1 for c in contracts if '_ree' in c)

category_group['reentrant_contracts'] = category_group['base_contract'].apply(count_reentrant)

# Misclassified contratti
misclassified = df[df['true_reentrancy'] != df['predicted_reentrancy']]
misclassified_group = misclassified.groupby('category')['base_contract'].unique().reset_index()
misclassified_group['misclassified_contracts'] = misclassified_group['base_contract'].apply(len)

# Merge
summary = pd.merge(category_group, misclassified_group[['category', 'misclassified_contracts']], on='category', how='left')
summary['misclassified_contracts'] = summary['misclassified_contracts'].fillna(0)
summary['percent_misclassified'] = summary['misclassified_contracts'] / summary['total_contracts'] * 100

summary = summary.sort_values('percent_misclassified', ascending=False)

summary.to_csv('category_summary_fixed.csv', index=False)


In [29]:
import pandas as pd

# Load your merged metrics file
df = pd.read_csv("reentrancy_metrics_data_all.csv")

# Misclassified = predicted != true
misclassified = df[df["true_reentrancy"] != df["predicted_reentrancy"]]

# Group by contract filename and collect the tools
summary = (
    misclassified.groupby("filename")["toolid"]
    .apply(lambda x: sorted(set(x)))  # unique & sorted
    .reset_index()
)

# Add a column with the number of tools
summary["num_tools"] = summary["toolid"].apply(len)

# Turn the list of tools into a comma-separated string
summary["toolid"] = summary["toolid"].apply(lambda tools: ", ".join(tools))

# Sort by number of tools (descending)
summary = summary.sort_values(by="num_tools", ascending=False)

# Save to CSV
summary.to_csv("misclassified_contracts_summary.csv", index=False)
print("Saved misclassified_contracts_summary.csv")


Saved misclassified_contracts_summary.csv


In [33]:
import pandas as pd
import re

# ---- config ----
INPUT_CSV = "misclassified_reentrant_contracts_summary.csv"  # change if your file name is different
OUTPUT_CSV = "deduplicated.csv"
# ----------------

def normalize_filename(name: str) -> str:
    """Remove occurrences of '_reeN' (N digits) from the filename, keep the rest intact.
       Examples:
         '09_ERC20DDSubMod_ree1/C.hex' -> '09_ERC20DDSubMod/C.hex'
         '16_DelegateCall_ree1.sol' -> '16_DelegateCall.sol'
    """
    if pd.isna(name):
        return name
    # remove all occurrences of _ree<digits>
    return re.sub(r'_ree\d+', '', str(name))

def split_tools_field(s: str):
    """Turn a possibly-quoted tools string into a list of stripped tool names."""
    if pd.isna(s):
        return []
    s = str(s).strip()
    # remove surrounding quotes if any
    if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")):
        s = s[1:-1]
    # split on commas, strip whitespace, ignore empty pieces
    parts = [p.strip() for p in re.split(r',\s*', s) if p.strip()]
    return parts

def combine_tools(list_of_tool_strings):
    """Take list of tool-string entries (each possibly comma-separated) and return sorted unique list."""
    all_tools = []
    for s in list_of_tool_strings:
        all_tools.extend(split_tools_field(s))
    # remove empty, normalize spacing, unique (case-insensitive but preserve original case ordering by lower)
    unique = sorted({t for t in all_tools if t}, key=lambda x: x.lower())
    return unique

def main():
    df = pd.read_csv(INPUT_CSV, dtype=str, keep_default_na=False)
    # ensure columns exist
    if 'filename' not in df.columns or 'toolid' not in df.columns:
        raise SystemExit("Input CSV must contain 'filename' and 'toolid' columns.")

    # normalize filenames by removing _reeN occurrences
    df['normalized_filename'] = df['filename'].apply(normalize_filename)

    # group by normalized filename and combine tool lists into a unique sorted set
    grouped = (
        df.groupby('normalized_filename')['toolid']
        .apply(list)  # collect all original toolid strings for this normalized filename
        .reset_index()
    )

    # combine and dedupe tools
    grouped['tools_list'] = grouped['toolid'].apply(combine_tools)
    grouped['num_tools'] = grouped['tools_list'].apply(len)
    grouped['tools'] = grouped['tools_list'].apply(lambda lst: ', '.join(lst))

    # keep columns in desired order, sort by num_tools desc, then filename
    result = grouped[['normalized_filename', 'tools', 'num_tools']].rename(columns={'normalized_filename':'filename'})
    result = result.sort_values(by=['num_tools', 'filename'], ascending=[False, True])

    # save
    result.to_csv(OUTPUT_CSV, index=False)
    print(f"Saved {OUTPUT_CSV} — {len(result)} rows (sorted by num_tools desc).")

if __name__ == "__main__":
    main()


Saved deduplicated.csv — 93 rows (sorted by num_tools desc).


In [30]:
import pandas as pd

# Load your merged metrics file
df = pd.read_csv("reentrancy_metrics_data_all.csv")

# Keep only reentrant contracts
df_reentrant = df[df["true_reentrancy"] == True]

# From those, select only the misclassified ones
misclassified = df_reentrant[df_reentrant["predicted_reentrancy"] != df_reentrant["true_reentrancy"]]

# Group by contract filename and collect the tools
summary = (
    misclassified.groupby("filename")["toolid"]
    .apply(lambda x: sorted(set(x)))  # unique & sorted
    .reset_index()
)

# Add a column with the number of tools
summary["num_tools"] = summary["toolid"].apply(len)

# Turn the list of tools into a comma-separated string
summary["toolid"] = summary["toolid"].apply(lambda tools: ", ".join(tools))

# Sort by number of tools (descending)
summary = summary.sort_values(by="num_tools", ascending=False)

# Save to CSV
summary.to_csv("misclassified_reentrant_contracts_summary.csv", index=False)
print("Saved misclassified_reentrant_contracts_summary.csv")


Saved misclassified_reentrant_contracts_summary.csv


In [28]:
import pandas as pd

# Load the merged CSV
df = pd.read_csv("reentrancy_metrics_data_all.csv")

# Identify misclassified rows
df['misclassified'] = df['predicted_reentrancy'] != df['true_reentrancy']

# Add category column based on first 2 characters of filename
df['category'] = df['filename'].str[:2]
# Aggregate per category
category_summary = df.groupby('category').agg(
    total_contracts=('filename', 'nunique'),
    reentrant_contracts=('filename', lambda x: x[x.str.contains('_ree')].nunique()),
    misclassified_contracts=('filename', lambda x: df.loc[df['filename'].isin(x) & df['misclassified'], 'filename'].nunique()),
    misclassified_list=('filename', lambda x: list(df.loc[df['filename'].isin(x) & df['misclassified'], 'filename'].unique()))
).reset_index()

# Convert list to comma-separated string for CSV
category_summary['misclassified_list'] = category_summary['misclassified_list'].apply(lambda x: ', '.join(x))

# Add percentage of misclassified contracts
category_summary['percent_misclassified'] = (category_summary['misclassified_contracts'] / category_summary['total_contracts'] * 100).round(2)

# Reorder columns: move percent_misclassified before misclassified_list
category_summary = category_summary[['category', 'total_contracts', 'reentrant_contracts', 'misclassified_contracts', 'percent_misclassified', 'misclassified_list']]

# Sort by percent misclassified descending
category_summary = category_summary.sort_values(by='percent_misclassified', ascending=False)

# Save to CSV
category_summary.to_csv("reentrancy_metrics_by_category.csv", index=False)

print("Saved sorted category-level summary to reentrancy_metrics_by_category.csv")


Saved sorted category-level summary to reentrancy_metrics_by_category.csv
