In [1]:
import json

def restructure_json(input_file, output_file):
    # Define the fixed order of algorithms
    algorithm_order = ["initial", "SCALE", "PFS", "PFS'", "FTA", "VPSC", "PRISM", "GTREE", "RWordle-L"]
    
    # Load the original JSON data
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Restructure each entry
    restructured_data = []
    for entry in data:
        new_entry = {
            "base_name": entry["base_name"],
            "layouts": []
        }
        
        # Add layouts in the specified order
        for algo in algorithm_order:
            if algo in entry:
                new_entry["layouts"].append(entry[algo])
            else:
                # If algorithm is missing, add empty list
                new_entry["layouts"].append([])
        
        restructured_data.append(new_entry)
    
    # Save the restructured data
    with open(output_file, 'w') as f:
        json.dump(restructured_data, f, indent=2)
    
    print(f"Successfully restructured JSON and saved to {output_file}")

# Example usage
input_json = "final_merged_with_initial.json"
output_json = "rescruction/restructured_layouts1.json"
restructure_json(input_json, output_json)

Successfully restructured JSON and saved to rescruction/restructured_layouts1.json


In [4]:
import json
import pandas as pd
from collections import defaultdict
import os

# Configuration
INPUT_JSON = "final_merged_with_initial.json"
INPUT_CSV = "output_scoresBYAZENoP.csv"
OUTPUT_DIR = "restruction"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Algorithm name mappings (JSON name : CSV name)
ALGORITHM_MAPPING = {
    "initial": "initial",
    "SCALE": "scale",
    "PFS": "pfs",
    "PFS'": "PFS'",
    "FTA": "fta",
    "VPSC": "vpsc",
    "PRISM": "prism",
    "GTREE": "gtree",
    "RWordle-L": "RWordle-L"
}

# Load data files
print("🔍 Loading data files...")
with open(INPUT_JSON, "r") as f:
    layout_data = json.load(f)

try:
    gqs_scores = pd.read_csv(INPUT_CSV)
    print(f"Loaded CSV with {len(gqs_scores)} rows")
except Exception as e:
    print(f"❌ Error loading CSV: {e}")
    exit()

# 1. Verify filename matching
print("\n🔎 Verifying filename matching...")
json_files = {entry["base_name"] for entry in layout_data}
csv_files = set(gqs_scores["filename"].unique())

common_files = json_files & csv_files
print(f"Common files: {len(common_files)}")

# 2. Create merge.json
print("\n🛠 Creating merge.json...")
output_data = []
algorithms = list(ALGORITHM_MAPPING.keys())  # Use JSON algorithm names

for entry in layout_data:
    graph_entry = {
        "base_name": entry["base_name"],
        "boxes": []
    }
    
    for algo in algorithms:
        if algo in entry:
            graph_entry["boxes"].extend(entry[algo])
    
    output_data.append(graph_entry)

with open(f"{OUTPUT_DIR}/merge.json", "w") as f:
    json.dump(output_data, f, indent=2)

# 3. Create scores.csv with proper score matching
print("\n💯 Creating scores.csv...")
scores_data = []

for entry in layout_data:
    base_name = entry["base_name"]
    
    for json_algo, csv_algo in ALGORITHM_MAPPING.items():
        if json_algo == "initial":
            score = 0.0
        else:
            # Check for match in CSV (case insensitive)
            match = gqs_scores[
                (gqs_scores["filename"].str.lower() == base_name.lower()) & 
                (gqs_scores["algorithm"].str.lower() == csv_algo.lower())
            ]
            
            if not match.empty:
                score = match["score"].values[0]
            else:
                score = 0.0
        
        scores_data.append({
            "filename": base_name,
            "algorithm": json_algo,  # Use JSON algorithm name in output
            "score": float(score)
        })

# Save scores
scores_df = pd.DataFrame(scores_data)
scores_df.to_csv(f"{OUTPUT_DIR}/scoresB2.csv", index=False)

# Verify results
print("\n📊 Score Summary:")
print(f"Total entries: {len(scores_data)}")
print(f"Non-zero scores: {len(scores_df[scores_df['score'] > 0])}")
print("Score distribution by algorithm:")
print(scores_df.groupby('algorithm')['score'].describe())

print("\n✅ Done! Files created in", OUTPUT_DIR)

🔍 Loading data files...
Loaded CSV with 6720 rows

🔎 Verifying filename matching...
Common files: 362

🛠 Creating merge.json...

💯 Creating scores.csv...

📊 Score Summary:
Total entries: 3258
Non-zero scores: 2896
Score distribution by algorithm:
           count      mean       std    min    25%    50%      75%    max
algorithm                                                                
FTA        362.0  0.703569  0.146990  0.081  0.661  0.789  0.81300  0.813
GTREE      362.0  0.674914  0.117943  0.393  0.563  0.697  0.79475  0.813
PFS        362.0  0.668561  0.167271  0.294  0.537  0.758  0.81300  0.813
PFS'       362.0  0.714834  0.116863  0.408  0.619  0.777  0.81300  0.813
PRISM      362.0  0.684475  0.121630  0.374  0.575  0.716  0.80625  0.813
RWordle-L  362.0  0.749204  0.064635  0.410  0.716  0.767  0.81300  0.813
SCALE      362.0  0.680514  0.121549  0.481  0.547  0.704  0.81300  0.813
VPSC       362.0  0.740994  0.106512  0.171  0.716  0.799  0.81300  0.813
initial    36

In [1]:
import json
import pandas as pd
from collections import defaultdict

def compare_csv_json(csv_path, json_path):
    # Load the CSV data
    csv_data = pd.read_csv(csv_path)
    csv_files = set(csv_data['filename'].unique())
    
    # Load the JSON data
    with open(json_path, 'r') as f:
        json_data = json.load(f)
    json_files = set(entry['base_name'] for entry in json_data)
    
    # Find matches and mismatches
    files_in_both = csv_files & json_files
    files_only_in_csv = csv_files - json_files
    files_only_in_json = json_files - csv_files
    
    # Print results
    print("\n📋 File Comparison Results:")
    print(f"Total files in CSV: {len(csv_files)}")
    print(f"Total files in JSON: {len(json_files)}")
    print(f"Files present in both: {len(files_in_both)}")
    
    if files_only_in_csv:
        print("\n🚨 Files only in CSV:")
        for file in sorted(files_only_in_csv):
            print(f"- {file}")
    
    if files_only_in_json:
        print("\n🚨 Files only in JSON:")
        for file in sorted(files_only_in_json):
            print(f"- {file}")
    
    # Return the results
    return {
        'common_files': sorted(files_in_both),
        'csv_only': sorted(files_only_in_csv),
        'json_only': sorted(files_only_in_json)
    }

# Example usage
if __name__ == "__main__":
    csv_path = "gqs_scores_per_algorithm.csv"  # Replace with your CSV path
    json_path = "final_merged_with_initial.json"  # Replace with your JSON path
    
    results = compare_csv_json(csv_path, json_path)
    
    # Save common files to a text file
    with open("common_files.txt", "w") as f:
        f.write("Files present in both CSV and JSON:\n")
        f.write("\n".join(results['common_files']))
    
    print("\n✅ Comparison complete. Common files saved to 'common_files.txt'")


📋 File Comparison Results:
Total files in CSV: 840
Total files in JSON: 362
Files present in both: 362

🚨 Files only in CSV:
- pa_1000_1
- pa_1000_10
- pa_1000_11
- pa_1000_12
- pa_1000_13
- pa_1000_14
- pa_1000_15
- pa_1000_16
- pa_1000_17
- pa_1000_18
- pa_1000_19
- pa_1000_2
- pa_1000_20
- pa_1000_21
- pa_1000_22
- pa_1000_23
- pa_1000_24
- pa_1000_25
- pa_1000_26
- pa_1000_27
- pa_1000_28
- pa_1000_29
- pa_1000_3
- pa_1000_30
- pa_1000_4
- pa_1000_5
- pa_1000_6
- pa_1000_7
- pa_1000_8
- pa_1000_9
- pa_100_1
- pa_100_10
- pa_100_11
- pa_100_12
- pa_100_13
- pa_100_14
- pa_100_15
- pa_100_16
- pa_100_17
- pa_100_18
- pa_100_19
- pa_100_2
- pa_100_20
- pa_100_21
- pa_100_22
- pa_100_23
- pa_100_24
- pa_100_25
- pa_100_26
- pa_100_27
- pa_100_28
- pa_100_29
- pa_100_3
- pa_100_30
- pa_100_4
- pa_100_5
- pa_100_6
- pa_100_7
- pa_100_8
- pa_100_9
- pa_200_1
- pa_200_10
- pa_200_11
- pa_200_12
- pa_200_13
- pa_200_14
- pa_200_15
- pa_200_16
- pa_200_17
- pa_200_18
- pa_200_19
- pa_200_2
