In [57]:
import csv
import json
import os

In [58]:
BASELINE_FOLDER = "../explanations/baseline"
DATASET_FOLDER = "../dataset/manually-verified-test/source"
CSV_OUTPUT_FOLDER = "logs/baseline_form"

QUESTIONNAIRE_FOLDER = "logs/baseline_questionnaire"

os.makedirs("logs", exist_ok=True)

In [59]:
def extract_contract_data(model, groundtruth_label, contract_id):
    """Extract contract data from a JSON file."""
    contract_file = os.path.join(BASELINE_FOLDER, model, groundtruth_label, f"{contract_id}.sol.json")
    source_file = os.path.join(DATASET_FOLDER, groundtruth_label, f"{contract_id}.sol")

    contract_data = {
        "contract_id": contract_id,
        "explanation": "No explanation found",
        "groundtruth": groundtruth_label,
        "classification": "Unknown",
        "source_code": "Source code not found",
    }

    if os.path.exists(contract_file):
        try:
            with open(contract_file, "r", encoding="utf-8") as f:
                data = json.load(f)
                contract_data["explanation"] = data.get("explanation", "No explanation found")
                contract_data["classification"] = data.get("classification", "Unknown").lower()
        except Exception as e:
            print(f"Error reading {contract_file}: {e}")

    if os.path.exists(source_file):
        try:
            with open(source_file, "r", encoding="latin1") as f:
                contract_data["source_code"] = f.read()
        except Exception as e:
            print(f"Error reading {source_file}: {e}")

    return contract_data


def explore_baseline_and_save_csv():
    """Explore the baseline folder and create a CSV file per model with contract details."""
    print("Exploring baseline folder and extracting contract details...")
    os.makedirs(CSV_OUTPUT_FOLDER, exist_ok=True)

    for model in os.listdir(BASELINE_FOLDER):
        model_path = os.path.join(BASELINE_FOLDER, model)
        if not os.path.isdir(model_path):
            continue

        rows = []
        for groundtruth_label in os.listdir(model_path):
            groundtruth_path = os.path.join(model_path, groundtruth_label)
            if not os.path.isdir(groundtruth_path):
                continue

            contract_files = [f.replace(".sol.json", "") for f in os.listdir(groundtruth_path) if
                              f.endswith(".sol.json")]

            for contract_id in contract_files:
                rows.append(extract_contract_data(model, groundtruth_label, contract_id))

        if rows:
            csv_file_path = os.path.join(CSV_OUTPUT_FOLDER, f"{model}.csv")
            headers = ["contract_id", "explanation", "groundtruth", "classification", "source_code"]

            with open(csv_file_path, "w", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=headers)
                writer.writeheader()
                writer.writerows(rows)

            print(f"Baseline contract data for model '{model}' saved to {csv_file_path}")


In [60]:
explore_baseline_and_save_csv()

Exploring baseline folder and extracting contract details...
Baseline contract data for model 'gpt-4o' saved to logs/baseline/gpt-4o.csv
Baseline contract data for model 'o3-mini' saved to logs/baseline/o3-mini.csv
Baseline contract data for model 'gpt-4o-mini' saved to logs/baseline/gpt-4o-mini.csv
Baseline contract data for model 'gpt-3.5-turbo' saved to logs/baseline/gpt-3.5-turbo.csv


In [61]:
def extract_contracts_from_csv():
    """Read each baseline CSV and save each contract's source code into a separate file, organized by model."""
    os.makedirs(QUESTIONNAIRE_FOLDER, exist_ok=True)

    for csv_file in os.listdir(CSV_OUTPUT_FOLDER):
        model = csv_file.split(".")[0]
        model_output_folder = os.path.join(QUESTIONNAIRE_FOLDER, model)
        os.makedirs(model_output_folder, exist_ok=True)

        csv_path = os.path.join(CSV_OUTPUT_FOLDER, csv_file)

        with open(csv_path, "r", encoding="utf-8") as f:
            reader = csv.reader(f)
            headers = next(reader)  # Read the header row
            contract_id_index = headers.index("contract_id")
            source_code_index = headers.index("source_code")

            for index, row in enumerate(reader, start=1):
                contract_id = row[contract_id_index]
                source_code = row[source_code_index]

                if source_code.strip():  # Skip empty source code
                    filename = os.path.join(model_output_folder, f"question_{index}_{contract_id}.sol")
                    with open(filename, "w", encoding="latin1") as contract_file:
                        contract_file.write(source_code)

    print(f"Extracted contract source codes saved in {QUESTIONNAIRE_FOLDER}/")

In [62]:
extract_contracts_from_csv()

Extracted contract source codes saved in logs/baseline_questionnaire/
