In [89]:
import csv
import json
import os
from collections import defaultdict

from tqdm import tqdm

In [90]:
explanations_folder = os.path.join("..", "explanations")
BASELINE_FOLDER = os.path.join(explanations_folder, "baseline")
XRAG_FOLDER = os.path.join(explanations_folder, "models_comparison")

DATASET_SOURCE_FOLDER = "../dataset/manually-verified-test/source"
MISCLASSIFICATION_ANALYSIS_FOLDER = "logs/misclassified"

MISCLASSIFIED_UI_FILE = "logs/misclassified_contracts_union_intersection.json"
MISCLASSIFIED_BASELINE_FILE = "logs/misclassified_contracts_baseline.json"
MISCLASSIFIED_XRAG_FILE = "logs/misclassified_contracts_xrag.json"

CSV_OUTPUT_FILE = "logs/misclassified_form.csv"
MODELS = ["o3-mini"]

QUESTIONNAIRE_FOLDER = "logs/misclassified_questionnaire"

os.makedirs("logs", exist_ok=True)

# Baseline

In [91]:
def initialize_misclassified():
    return {
        "total": 0,
        "reentrant": 0,
        "safe": 0,
        "misclassified_reentrant": 0,
        "misclassified_safe": 0,
        "misclassified_contracts": []
    }


def get_models(base_path):
    return [model for model in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, model))]


def process_contract_file(contract_path, groundtruth, misclassified_model):
    try:
        with open(contract_path, 'r') as f:
            data = json.load(f)
            contract_id = os.path.basename(contract_path).replace(".sol.json", "")
            predicted_label = data.get("classification", "").lower()

            if predicted_label != groundtruth:
                misclassified_model[f"misclassified_{groundtruth}"] += 1
                misclassified_model["misclassified_contracts"].append({
                    "contract_id": contract_id,
                    "groundtruth": groundtruth,
                    "predicted": predicted_label
                })
    except Exception as e:
        print(f"Error processing {contract_path}: {e}")


def process_model(model, base_path, misclassified):
    model_path = os.path.join(base_path, model)
    misclassified[model] = initialize_misclassified()

    for groundtruth in ['reentrant', 'safe']:
        groundtruth_path = os.path.join(model_path, groundtruth)
        if not os.path.isdir(groundtruth_path):
            continue

        contract_files = [f for f in os.listdir(groundtruth_path) if f.endswith(".json")]
        misclassified[model][groundtruth] += len(contract_files)
        misclassified[model]["total"] += len(contract_files)

        for contract_file in tqdm(contract_files, desc=f"Checking {model} in {groundtruth}", leave=False):
            process_contract_file(os.path.join(groundtruth_path, contract_file), groundtruth, misclassified[model])


def load_classification_results(base_path):
    misclassified = defaultdict(initialize_misclassified)
    models = get_models(base_path)

    for model in tqdm(models, desc=f"Processing models in {base_path}", leave=False):
        process_model(model, base_path, misclassified)

    return misclassified


def save_misclassified_results(misclassified):
    with open(MISCLASSIFIED_BASELINE_FILE, 'w') as f:
        json.dump(misclassified, f, indent=4)
    print(f"Misclassified contract IDs saved to {MISCLASSIFIED_BASELINE_FILE}")


def print_misclassification_stats(misclassified):
    print("\nMisclassification Statistics:")
    for model, stats in misclassified.items():
        print(f"Model: {model}")
        print(f"  Total Contracts: {stats['total']}")
        print(f"  Reentrant: {stats['reentrant']}, Safe: {stats['safe']}")
        print(
            f"  Misclassified Reentrant: {stats['misclassified_reentrant']}, Misclassified Safe: {stats['misclassified_safe']}")
        print(f"  Misclassified Contracts: {len(stats['misclassified_contracts'])}")


In [92]:
misclassified_results = load_classification_results(BASELINE_FOLDER)
save_misclassified_results(misclassified_results)
print_misclassification_stats(misclassified_results)

Processing models in ../explanations/baseline:   0%|          | 0/4 [00:00<?, ?it/s]
Checking gpt-4o in reentrant:   0%|          | 0/41 [00:00<?, ?it/s][A
                                                                    [A
Checking gpt-4o in safe:   0%|          | 0/42 [00:00<?, ?it/s][A
                                                               [A
Checking o3-mini in reentrant:   0%|          | 0/39 [00:00<?, ?it/s][A
                                                                     [A
Checking o3-mini in safe:   0%|          | 0/41 [00:00<?, ?it/s][A
                                                                [A
Checking gpt-4o-mini in reentrant:   0%|          | 0/36 [00:00<?, ?it/s][A
                                                                         [A
Checking gpt-4o-mini in safe:   0%|          | 0/42 [00:00<?, ?it/s][A
                                                                    [A
Checking gpt-3.5-turbo in reentrant:   0%|          | 0/4

Misclassified contract IDs saved to logs/misclassified_contracts_baseline.json

Misclassification Statistics:
Model: gpt-4o
  Total Contracts: 83
  Reentrant: 41, Safe: 42
  Misclassified Reentrant: 0, Misclassified Safe: 11
  Misclassified Contracts: 11
Model: o3-mini
  Total Contracts: 80
  Reentrant: 39, Safe: 41
  Misclassified Reentrant: 4, Misclassified Safe: 2
  Misclassified Contracts: 6
Model: gpt-4o-mini
  Total Contracts: 78
  Reentrant: 36, Safe: 42
  Misclassified Reentrant: 1, Misclassified Safe: 11
  Misclassified Contracts: 12
Model: gpt-3.5-turbo
  Total Contracts: 81
  Reentrant: 41, Safe: 40
  Misclassified Reentrant: 5, Misclassified Safe: 4
  Misclassified Contracts: 9




# XRAG Model Comparison

In [93]:
def initialize_misclassified():
    return {
        "total": 0,
        "reentrant": 0,
        "safe": 0,
        "misclassified_reentrant": 0,
        "misclassified_safe": 0,
        "misclassified_by_data_type": defaultdict(
            lambda: {"reentrant": {"misclassified": 0, "total": 0}, "safe": {"misclassified": 0, "total": 0}}
        ),
        "misclassified_contracts": []
    }


def process_classification_file(classification_file, groundtruth, data_type, contract_id, misclassified_model):
    try:
        with open(classification_file, 'r') as f:
            data = json.load(f)
            predicted_label = data.get("classification", "").lower()

            misclassified_model[groundtruth] += 1
            misclassified_model["total"] += 1
            misclassified_model["misclassified_by_data_type"][data_type][groundtruth]["total"] += 1

            if predicted_label != groundtruth:
                misclassified_model[f"misclassified_{groundtruth}"] += 1
                misclassified_model["misclassified_by_data_type"][data_type][groundtruth]["misclassified"] += 1
                misclassified_model["misclassified_contracts"].append({
                    "contract_id": contract_id,
                    "groundtruth": groundtruth,
                    "predicted": predicted_label,
                    "data_type": data_type
                })
    except Exception as e:
        print(f"Error processing {classification_file}: {e}")


def process_model(model, base_path, misclassified):
    model_path = os.path.join(base_path, model)
    misclassified[model] = initialize_misclassified()

    for data_type in os.listdir(model_path):
        data_type_path = os.path.join(model_path, data_type)
        if not os.path.isdir(data_type_path):
            continue

        for groundtruth in ['reentrant', 'safe']:
            groundtruth_path = os.path.join(data_type_path, groundtruth)
            if not os.path.isdir(groundtruth_path):
                continue

            for contract_id in os.listdir(groundtruth_path):
                classification_file = os.path.join(groundtruth_path, contract_id, "classification.json")
                if os.path.isfile(classification_file):
                    process_classification_file(classification_file, groundtruth, data_type, contract_id,
                                                misclassified[model])


def load_classification_results(base_path):
    misclassified = defaultdict(initialize_misclassified)

    for model in os.listdir(base_path):
        if os.path.isdir(os.path.join(base_path, model)):
            process_model(model, base_path, misclassified)

    return misclassified


def save_misclassified_results(misclassified):
    with open(MISCLASSIFIED_XRAG_FILE, 'w') as f:
        json.dump(misclassified, f, indent=4)
    print(f"Misclassified contract IDs saved to {MISCLASSIFIED_XRAG_FILE}")


def print_misclassification_stats(misclassified):
    print("\nMisclassification Statistics:")
    for model, stats in misclassified.items():
        print(f"Model: {model}")
        print(f"  Total Contracts: {stats['total']}")
        print(f"  Reentrant: {stats['reentrant']}, Safe: {stats['safe']}")
        print(
            f"  Misclassified Reentrant: {stats['misclassified_reentrant']}, Misclassified Safe: {stats['misclassified_safe']}")
        print(f"  Misclassified Contracts: {len(stats['misclassified_contracts'])}")
        print("  Misclassification by Data Type and Groundtruth:")
        for data_type, groundtruth_stats in stats["misclassified_by_data_type"].items():
            print(f"    Data Type: {data_type}")
            for groundtruth, counts in groundtruth_stats.items():
                print(f"      {groundtruth}: {counts['misclassified']} misclassified out of {counts['total']}")


In [94]:
misclassified_results = load_classification_results(XRAG_FOLDER)
save_misclassified_results(misclassified_results)
print_misclassification_stats(misclassified_results)

Misclassified contract IDs saved to logs/misclassified_contracts_xrag.json

Misclassification Statistics:
Model: gpt-4o
  Total Contracts: 265
  Reentrant: 126, Safe: 139
  Misclassified Reentrant: 3, Misclassified Safe: 36
  Misclassified Contracts: 39
  Misclassification by Data Type and Groundtruth:
    Data Type: ast_cfg
      reentrant: 1 misclassified out of 44
      safe: 15 misclassified out of 50
    Data Type: cfg
      reentrant: 1 misclassified out of 41
      safe: 7 misclassified out of 47
    Data Type: ast
      reentrant: 1 misclassified out of 41
      safe: 14 misclassified out of 42
Model: o3-mini
  Total Contracts: 265
  Reentrant: 126, Safe: 139
  Misclassified Reentrant: 19, Misclassified Safe: 6
  Misclassified Contracts: 25
  Misclassification by Data Type and Groundtruth:
    Data Type: ast_cfg
      reentrant: 7 misclassified out of 44
      safe: 2 misclassified out of 50
    Data Type: cfg
      reentrant: 5 misclassified out of 41
      safe: 2 misclassifi

# Intersection and Union of Misclassified Contracts

In [95]:
def load_misclassified_data(file_path):
    """ Load misclassified contracts from a JSON file. """
    print(f"Loading misclassified data from {file_path}...")
    with open(file_path, 'r') as f:
        data = json.load(f)
    print(f"Loaded {len(data)} models from {file_path}.")
    return data


def extract_misclassified_contracts(model_data, groundtruth, data_type=None):
    """ Extract misclassified contract IDs filtered by groundtruth and optionally by data type. """
    return {
        contract["contract_id"] for contract in model_data.get("misclassified_contracts", [])
        if contract["groundtruth"] == groundtruth and (data_type is None or contract["data_type"] == data_type)
    }


def compute_union_intersection(baseline_data, comparison_data):
    """ Compute the union and intersection of misclassified contracts, broken down by reentrant, safe, and data type. """
    print("Computing union and intersection of misclassified contracts...")
    results = {}

    for model in baseline_data.keys() | comparison_data.keys():
        print(f"Processing model: {model}")
        results[model] = {
            "union": defaultdict(lambda: {"reentrant": set(), "safe": set()}),
            "intersection": defaultdict(lambda: {"reentrant": set(), "safe": set()}),
            "improvement": defaultdict(lambda: {"reentrant": 0, "safe": 0})
        }

        baseline_reentrant = extract_misclassified_contracts(baseline_data.get(model, {}), "reentrant")
        baseline_safe = extract_misclassified_contracts(baseline_data.get(model, {}), "safe")

        data_types = {
            contract["data_type"] for contract in comparison_data.get(model, {}).get("misclassified_contracts", [])
        }

        for data_type in data_types:
            print(f"  Processing data type: {data_type}")
            comparison_reentrant = extract_misclassified_contracts(comparison_data.get(model, {}), "reentrant",
                                                                   data_type)
            comparison_safe = extract_misclassified_contracts(comparison_data.get(model, {}), "safe", data_type)

            results[model]["union"][data_type]["reentrant"] = list(baseline_reentrant | comparison_reentrant)
            results[model]["union"][data_type]["safe"] = list(baseline_safe | comparison_safe)
            results[model]["intersection"][data_type]["reentrant"] = list(baseline_reentrant & comparison_reentrant)
            results[model]["intersection"][data_type]["safe"] = list(baseline_safe & comparison_safe)

            results[model]["improvement"][data_type]["reentrant"] = len(baseline_reentrant) - len(comparison_reentrant)
            results[model]["improvement"][data_type]["safe"] = len(baseline_safe) - len(comparison_safe)

    print("Completed computation of union, intersection, and improvement.")
    return results


def print_stats(results):
    """ Print statistics about the union and intersection of misclassified contracts broken down by data type. """
    print("\nMisclassification Union, Intersection, and Improvement Statistics:")
    for model, stats in results.items():
        print(f"Model: {model}")
        for data_type, data in stats["union"].items():
            print(f"  Data Type: {data_type}")
            print(f"    Union Reentrant: {len(data['reentrant'])}, Union Safe: {len(data['safe'])}")
            print(
                f"    Intersection Reentrant: {len(stats['intersection'][data_type]['reentrant'])}, Intersection Safe: {len(stats['intersection'][data_type]['safe'])}")
            print(
                f"    Improvement Reentrant: {stats['improvement'][data_type]['reentrant']}, Improvement Safe: {stats['improvement'][data_type]['safe']}")


def save_results(results):
    """ Save the computed union, intersection, and improvement data to a JSON file. """
    print(f"Saving results to {MISCLASSIFIED_UI_FILE}...")
    with open(MISCLASSIFIED_UI_FILE, 'w') as f:
        json.dump(results, f, indent=4)
    print(f"Results successfully saved to {MISCLASSIFIED_UI_FILE}.")

In [96]:
print("Starting misclassification analysis...")
baseline_data = load_misclassified_data(MISCLASSIFIED_BASELINE_FILE)
comparison_data = load_misclassified_data(MISCLASSIFIED_XRAG_FILE)
union_intersection_results = compute_union_intersection(baseline_data, comparison_data)
save_results(union_intersection_results)
print_stats(union_intersection_results)
print("Analysis complete.")

Starting misclassification analysis...
Loading misclassified data from logs/misclassified_contracts_baseline.json...
Loaded 4 models from logs/misclassified_contracts_baseline.json.
Loading misclassified data from logs/misclassified_contracts_xrag.json...
Loaded 5 models from logs/misclassified_contracts_xrag.json.
Computing union and intersection of misclassified contracts...
Processing model: gpt-4o-mini
  Processing data type: ast
  Processing data type: cfg
  Processing data type: ast_cfg
Processing model: o3-mini
  Processing data type: ast
  Processing data type: cfg
  Processing data type: ast_cfg
Processing model: o3-mini-final
  Processing data type: ast
  Processing data type: cfg
  Processing data type: ast_cfg
Processing model: gpt-4o
  Processing data type: ast
  Processing data type: cfg
  Processing data type: ast_cfg
Processing model: gpt-3.5-turbo
  Processing data type: ast
  Processing data type: cfg
  Processing data type: ast_cfg
Completed computation of union, int

In [97]:
def load_json_file(file_path, default_message="Error loading file"):
    """ Load JSON data from a file safely. """
    if os.path.exists(file_path):
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            return default_message
    return default_message


def load_misclassified_data():
    """Load misclassified contracts from JSON file."""
    print(f"Loading misclassified contracts from {MISCLASSIFIED_UI_FILE}...")
    return load_json_file(MISCLASSIFIED_UI_FILE, {})


def load_explanation(folder, *path_parts):
    """Load explanation from the given path parts."""
    explanation_file = os.path.join(folder, *path_parts)
    return load_json_file(explanation_file).get("explanation", "No explanation found")


def load_baseline_explanation(model, groundtruth_label, contract_id):
    """Load explanation from baseline folder."""
    return load_explanation(BASELINE_FOLDER, model, groundtruth_label, f"{contract_id}.sol.json")


def load_comparison_explanation(model, data_type, groundtruth_label, contract_id):
    """Load explanation from models_comparison folder."""
    return load_explanation(XRAG_FOLDER, model, data_type, groundtruth_label, contract_id, "classification.json")


def load_contract_source(groundtruth_label, contract_id):
    """Load the source code of a contract from dataset folder."""
    contract_path = os.path.join(DATASET_SOURCE_FOLDER, groundtruth_label, f"{contract_id}.sol")
    if os.path.exists(contract_path):
        try:
            with open(contract_path, "r", encoding="latin1") as f:
                return f.read()
        except Exception as e:
            print(f"Error reading source code for {contract_id}: {e}")
            return "Error loading source code"
    return "Source code not found"


def save_json(output_dir, filename, data):
    """Save JSON data to a specified directory and filename."""
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, filename)
    with open(output_file, "w") as f:
        json.dump(data, f, indent=4)


def process_results(misclassified_data):
    """Process misclassified contracts and organize them into structured folders."""
    print("Processing misclassified contracts...")
    os.makedirs(MISCLASSIFICATION_ANALYSIS_FOLDER, exist_ok=True)

    for model, data_types in misclassified_data.items():
        print(f"Processing model: {model}")
        for data_type, groundtruths in data_types.get("intersection", {}).items():
            for groundtruth_label, contract_ids in groundtruths.items():
                for contract_id in contract_ids:
                    output_dir = os.path.join(MISCLASSIFICATION_ANALYSIS_FOLDER, model, data_type, groundtruth_label)
                    contract_data = {
                        "contract_id": contract_id,
                        "groundtruth": groundtruth_label,
                        "model_classification": "misclassified",
                        "baseline_explanation": load_baseline_explanation(model, groundtruth_label, contract_id),
                        "comparison_explanation": load_comparison_explanation(model, data_type, groundtruth_label,
                                                                              contract_id),
                        "source_code": load_contract_source(groundtruth_label, contract_id)
                    }
                    save_json(output_dir, f"{contract_id}.json", contract_data)

    print("Processing completed. Data saved to results/.")

In [98]:
misclassified_data = load_misclassified_data()
process_results(misclassified_data)

Loading misclassified contracts from logs/misclassified_contracts_union_intersection.json...
Processing misclassified contracts...
Processing model: gpt-4o-mini
Processing model: o3-mini
Processing model: o3-mini-final
Processing model: gpt-4o
Processing model: gpt-3.5-turbo
Processing completed. Data saved to results/.


# Google Form Data

In [99]:
def gather_evaluation_data():
    """Collect relevant data for evaluation from results."""
    evaluation_rows = []

    for model in filter(lambda m: m in MODELS, os.listdir(MISCLASSIFICATION_ANALYSIS_FOLDER)):
        model_path = os.path.join(MISCLASSIFICATION_ANALYSIS_FOLDER, model)
        if not os.path.isdir(model_path):
            continue

        for data_type in os.listdir(model_path):
            data_type_path = os.path.join(model_path, data_type)

            for groundtruth_label in os.listdir(data_type_path):
                groundtruth_path = os.path.join(data_type_path, groundtruth_label)

                contract_files = [f for f in os.listdir(groundtruth_path) if f.endswith(".json")]

                for contract_file in contract_files:
                    contract_path = os.path.join(groundtruth_path, contract_file)
                    with open(contract_path, "r") as f:
                        contract_data = json.load(f)

                    evaluation_rows.append([
                        contract_data.get("contract_id", "Unknown"),
                        model,
                        data_type,
                        groundtruth_label,
                        contract_data.get("source_code", "N/A"),
                        contract_data.get("baseline_explanation", "N/A"),
                        contract_data.get("comparison_explanation", "N/A")
                    ])
    return evaluation_rows


def save_to_csv(rows):
    """Save evaluation data to CSV file."""
    print(f"Saving evaluation data to {CSV_OUTPUT_FILE}...")

    headers = [
        "contract_id", "model", "data_type", "groundtruth",
        "source_code", "baseline_explanation", "xrag_explanation"
    ]

    with open(CSV_OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        writer.writerows(rows)

    print(f"Evaluation CSV saved to {CSV_OUTPUT_FILE}")

In [100]:
evaluation_data = gather_evaluation_data()
save_to_csv(evaluation_data)
print("Google Form evaluation file is ready!")

Saving evaluation data to logs/misclassified_form.csv...
Evaluation CSV saved to logs/misclassified_form.csv
Google Form evaluation file is ready!


In [101]:
def extract_contracts_from_csv():
    """Read the CSV file and save each contract's source code into a separate file."""
    os.makedirs(QUESTIONNAIRE_FOLDER, exist_ok=True)

    with open(CSV_OUTPUT_FILE, "r", encoding="utf-8") as f:
        reader = csv.reader(f)
        headers = next(reader)  # Read the header row

        contract_id_index = headers.index("contract_id")
        source_code_index = headers.index("source_code")

        for index, row in enumerate(reader, start=1):
            contract_id = row[contract_id_index]
            source_code = row[source_code_index]

            if source_code.strip():  # Skip empty source code
                filename = os.path.join(QUESTIONNAIRE_FOLDER, f"question_{index}_{contract_id}.sol")
                with open(filename, "w", encoding="utf-8") as contract_file:
                    contract_file.write(source_code)

    print(f"Extracted contract source codes saved in {QUESTIONNAIRE_FOLDER}/")

In [102]:
extract_contracts_from_csv()

Extracted contract source codes saved in logs/misclassified_questionnaire/
