In [2]:
import json
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Constants
N, M, K, F = 362, 9, 1000, 4
class_labels = [
    "initial", "SCALE", "PFS", "PFS'", "FTA",
    "VPSC", "PRISM", "GTREE", "RWordle-L"
]

# ---------------- Load Data ----------------
with open("merge.json", "r") as f:
    sets_data = json.load(f)

scores_df = pd.read_csv("scores.csv")

# Initialize tensors
X_tensor = np.zeros((N, M, K, F), dtype=np.float32)
Y_tensor = np.zeros((N, M), dtype=np.float32)

# ---------------- Data Processing Functions ----------------
def is_valid_box(box):
    """Check if the box has the correct format [x, y, width, height]"""
    return isinstance(box, list) and len(box) == F and all(isinstance(x, (int, float)) for x in box)

def normalize_box(box):
    """Normalize box coordinates to [0, 1] range"""
    # Adjust these denominators based on your actual data range
    return [
        box[0] / 100.0,    # x position
        box[1] / 100.0,    # y position
        box[2] / 50.0,     # width
        box[3] / 50.0      # height
    ]

def process_data(sets_data, scores_df):
    """Process and normalize all data into tensors"""
    # Create mapping from filename to index
    filename_to_idx = {entry['base_name']: idx for idx, entry in enumerate(sets_data)}
    
    # Process each entry in the dataset
    for entry in sets_data:
        base_name = entry['base_name']
        idx = filename_to_idx.get(base_name, -1)
        if idx == -1:
            continue  # Skip if base_name not found
        
        boxes = entry.get('boxes', [])
        
        # Get scores for this file
        file_scores = scores_df[scores_df['filename'] == base_name]
        
        # Process each algorithm's boxes and scores
        for algo_idx, algo in enumerate(class_labels):
            # Get score for this algorithm
            score_row = file_scores[file_scores['algorithm'] == algo]
            if not score_row.empty:
                Y_tensor[idx, algo_idx] = score_row['score'].values[0]
            
            # Get boxes for this algorithm - handle cases where boxes might not match class_labels
            if isinstance(boxes, list) and algo_idx < len(boxes):
                algo_boxes = boxes[algo_idx]
                
                # Handle case where algo_boxes might be a single box or list of boxes
                if is_valid_box(algo_boxes):
                    # Single box case
                    X_tensor[idx, algo_idx, 0] = normalize_box(algo_boxes)
                elif isinstance(algo_boxes, list):
                    # Multiple boxes case
                    for box_idx, box in enumerate(algo_boxes[:K]):  # Truncate to K boxes
                        if is_valid_box(box):
                            X_tensor[idx, algo_idx, box_idx] = normalize_box(box)
    
    return X_tensor, Y_tensor

# ---------------- Main Processing ----------------
try:
    X_tensor, Y_tensor = process_data(sets_data, scores_df)
    print(f"X_tensor shape: {X_tensor.shape}")  # Should be (362, 9, 1000, 4)
    print(f"Y_tensor shape: {Y_tensor.shape}")  # Should be (362, 9)
    
    # Optional: Save tensors
    np.save("X_tensor.npy", X_tensor)
    np.save("Y_tensor.npy", Y_tensor)
    
except Exception as e:
    print(f"Error processing data: {str(e)}")

X_tensor shape: (362, 9, 1000, 4)
Y_tensor shape: (362, 9)


In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Load the saved tensors
X_tensor = np.load("X_tensor.npy")  # Shape: (N, M, K, F)
Y_tensor = np.load("Y_tensor.npy")  # Shape: (N, M)

# Dataset Class
class BoxDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_tensor, Y_tensor, test_size=0.2, random_state=42
)

train_dataset = BoxDataset(X_train, y_train)
test_dataset = BoxDataset(X_test, y_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Fixed Neural Network Architecture with proper reshaping
class BoxPredictor(nn.Module):
    def __init__(self, M, K, F):
        super(BoxPredictor, self).__init__()
        self.M = M  # Number of algorithms (9)
        self.K = K  # Number of boxes (1000)
        self.F = F  # Features per box (4)
        
        # Box feature processor
        self.box_encoder = nn.Sequential(
            nn.Linear(F, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU()
        )
        
        # Algorithm-level processor
        self.algorithm_processor = nn.Sequential(
            nn.Linear(128 * K, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU()
        )
        
        # Final score predictor
        self.score_predictor = nn.Linear(128, 1)
        
    def forward(self, x):
        # x shape: (batch_size, M, K, F)
        batch_size = x.shape[0]
        
        # Process all algorithms in parallel
        # Reshape to (batch_size*M, K, F)
        x = x.reshape(-1, self.K, self.F)
        
        # Process each box: (batch_size*M*K, F) -> (batch_size*M*K, 128)
        encoded_boxes = self.box_encoder(x.reshape(-1, self.F))
        
        # Reshape back to (batch_size*M, K*128)
        encoded_boxes = encoded_boxes.reshape(batch_size * self.M, -1)
        
        # Process all boxes for each algorithm: (batch_size*M, 128)
        algo_features = self.algorithm_processor(encoded_boxes)
        
        # Predict scores: (batch_size*M, 1)
        scores = self.score_predictor(algo_features)
        
        # Reshape to final output: (batch_size, M)
        return scores.reshape(batch_size, self.M)

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BoxPredictor(M=9, K=1000, F=4).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Training Loop with gradient clipping
def train_model(model, train_loader, test_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item() * X_batch.size(0)
        
        # Validation
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                test_loss += criterion(outputs, y_batch).item() * X_batch.size(0)
        
        # Print statistics
        train_loss = train_loss / len(train_loader.dataset)
        test_loss = test_loss / len(test_loader.dataset)
        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f} - Test Loss: {test_loss:.4f}")

# Train the model
train_model(model, train_loader, test_loader, epochs=20)

# Save the trained model
torch.save(model.state_dict(), "box_predictor_model.pth")

Epoch 1/20 - Train Loss: 0.9578 - Test Loss: 1.0406
Epoch 2/20 - Train Loss: 0.5229 - Test Loss: 0.0845
Epoch 3/20 - Train Loss: 0.3238 - Test Loss: 0.0982
Epoch 4/20 - Train Loss: 0.0916 - Test Loss: 0.0852
Epoch 5/20 - Train Loss: 0.0877 - Test Loss: 0.0843
Epoch 6/20 - Train Loss: 0.0848 - Test Loss: 0.0861
Epoch 7/20 - Train Loss: 0.0850 - Test Loss: 0.0882
Epoch 8/20 - Train Loss: 0.0861 - Test Loss: 0.0870
Epoch 9/20 - Train Loss: 0.0863 - Test Loss: 0.0914
Epoch 10/20 - Train Loss: 0.0892 - Test Loss: 0.0876
Epoch 11/20 - Train Loss: 0.0854 - Test Loss: 0.0922
Epoch 12/20 - Train Loss: 0.0861 - Test Loss: 0.0836
Epoch 13/20 - Train Loss: 0.0833 - Test Loss: 0.0830
Epoch 14/20 - Train Loss: 0.0831 - Test Loss: 0.0825
Epoch 15/20 - Train Loss: 0.0829 - Test Loss: 0.0861
Epoch 16/20 - Train Loss: 0.0841 - Test Loss: 0.0824
Epoch 17/20 - Train Loss: 0.0828 - Test Loss: 0.0826
Epoch 18/20 - Train Loss: 0.0825 - Test Loss: 0.0819
Epoch 19/20 - Train Loss: 0.0822 - Test Loss: 0.0811
Ep

In [7]:
import json
import os
import numpy as np
from collections import defaultdict

# Configuration
class_labels = [
    "initial", "SCALE", "PFS", "PFS'", "FTA",
    "VPSC", "PRISM", "GTREE", "RWordle-L"
]
input_dir = "C:/Users/Informatics/Documents/ML4Vis/Data/random_50_16/res"
output_file = "random_50_16_merged_data.json"

# Step 1: Merge all JSON files into structured format
def merge_json_files(input_dir, class_labels):
    merged_data = []
    
    # Group files by base name (e.g., "random_50_1")
    file_groups = defaultdict(dict)
    
    for filename in os.listdir(input_dir):
        if filename.endswith(".json"):
            # Parse base name and algorithm
            if filename.startswith("output_Diamond.json"):
                base_name = filename.replace("output_Diamond.json", "").strip("_")
                algorithm = "initial"
            else:
                parts = filename.replace("output_", "").replace(".json", "").split("_")
                base_name = "_".join(parts[:-1])
                algorithm = parts[-1]
            
            # Load JSON data
            with open(os.path.join(input_dir, filename), 'r') as f:
                data = json.load(f)
            
            # Store in groups
            if base_name not in file_groups:
                file_groups[base_name] = {"base_name": base_name, "boxes": []}
            
            # Ensure boxes are ordered by class_labels
            if algorithm in class_labels:
                idx = class_labels.index(algorithm)
                # Pad with empty lists if necessary
                while len(file_groups[base_name]["boxes"]) <= idx:
                    file_groups[base_name]["boxes"].append([])
                file_groups[base_name]["boxes"][idx] = data["boxes"]
    
    # Convert to list and ensure proper ordering
    for base_name in sorted(file_groups.keys()):
        entry = file_groups[base_name]
        # Fill any missing algorithms with empty boxes
        while len(entry["boxes"]) < len(class_labels):
            entry["boxes"].append([])
        merged_data.append(entry)
    
    return merged_data

# Step 2: Save merged data
merged_data = merge_json_files(input_dir, class_labels)
with open(output_file, 'w') as f:
    json.dump(merged_data, f, indent=2)

print(f"Merged data saved to {output_file}")

Merged data saved to random_50_16_merged_data.json


In [8]:
# Step 3: Prepare for neural network prediction
def prepare_for_prediction(merged_file, class_labels):
    with open(merged_file, 'r') as f:
        data = json.load(f)
    
    N = len(data)
    M = len(class_labels)
    K = 1000  # Maximum number of boxes to consider
    F = 4     # Box features
    
    X_tensor = np.zeros((N, M, K, F), dtype=np.float32)
    
    for i, entry in enumerate(data):
        for j, boxes in enumerate(entry["boxes"]):
            # Take first K boxes if available
            num_boxes = min(len(boxes), K)
            if num_boxes > 0:
                X_tensor[i, j, :num_boxes] = boxes[:num_boxes]
    
    return X_tensor

# Load the trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BoxPredictor(M=len(class_labels), K=1000, F=4).to(device)
model.load_state_dict(torch.load("box_predictor_model.pth"))
model.eval()

# Step 4: Predict best algorithm
def predict_best_algorithm(merged_file, model, class_labels):
    X_tensor = prepare_for_prediction(merged_file, class_labels)
    X_tensor = torch.tensor(X_tensor, dtype=torch.float32).to(device)
    
    with torch.no_grad():
        predictions = model(X_tensor)
    
    results = []
    for i in range(len(X_tensor)):
        scores = predictions[i].cpu().numpy()
        best_idx = np.argmax(scores)
        results.append({
            "base_name": "unix",
            "predicted_scores": {class_labels[j]: float(scores[j]) for j in range(len(class_labels))},
            "best_algorithm": class_labels[best_idx],
            "best_score": float(scores[best_idx])
        })
    
    return results
output_file="unix_merged_data.json"
# Make predictions
predictions = predict_best_algorithm(output_file, model, class_labels)

# Save predictions
with open("algorithm_predictionsunix.json", 'w') as f:
    json.dump(predictions, f, indent=2)

print("Predictions saved to algorithm_predictions.json")

Predictions saved to algorithm_predictions.json


In [12]:
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import cm
from matplotlib.colors import to_rgba

# ---------- Load Data ----------
def load_json_file(filepath):
    with open(filepath, 'r') as f:
        content = f.read().strip()
        try:
            data = json.loads(content)
            if isinstance(data, str):
                data = json.loads(data)
            return data
        except Exception as e:
            raise ValueError(f"Error loading {filepath}: {e}")

# Load box data and predictions
merged_entry = load_json_file("merged_data.json")
predictions = load_json_file("algorithm_predictions.json")

# Make prediction lookup by base_name
prediction_dict = {
    p["base_name"]: p for p in predictions if isinstance(p, dict) and "base_name" in p
}

# ---------- Visualization Setup ----------
class_labels = [
    "initial", "SCALE", "PFS", "PFS'", "FTA",
    "VPSC", "PRISM", "GTREE", "RWordle-L"
]
colors = cm.get_cmap('tab10', len(class_labels))
algorithm_colors = {algo: to_rgba(colors(i)) for i, algo in enumerate(class_labels)}

def plot_boxes(entry, prediction, figsize=(36, 5)):
    base_name = entry["base_name"]
    best_algo = prediction["best_algorithm"]

    fig, axes = plt.subplots(1, 9, figsize=figsize)
    fig.suptitle(f"Box Visualization - {base_name}\nBest Algorithm: {best_algo} (Score: {prediction['best_score']:.3f})",
                 fontsize=16, y=1.15)

    for i, (algo, ax) in enumerate(zip(class_labels, axes)):
        boxes = entry["boxes"][i] if i < len(entry["boxes"]) else []

        facecolor = list(algorithm_colors[algo])
        facecolor[3] = 0.3  # transparency

        valid_boxes = 0
        all_x = []
        all_y = []

        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                try:
                    x, y, w, h = [float(coord) for coord in box]
                    all_x.extend([x, x + w])
                    all_y.extend([y, y + h])

                    rect = patches.Rectangle((x, y), w, h, linewidth=1,
                                             edgecolor=algorithm_colors[algo],
                                             facecolor=facecolor)
                    ax.add_patch(rect)
                    valid_boxes += 1
                except Exception as e:
                    print(f"Invalid box in {base_name}/{algo}: {box} - {str(e)}")

        if valid_boxes > 0:
            min_x, max_x = min(all_x), max(all_x)
            min_y, max_y = min(all_y), max(all_y)
            pad_x = (max_x - min_x) * 0.05
            pad_y = (max_y - min_y) * 0.05
            ax.set_xlim(min_x - pad_x, max_x + pad_x)
            ax.set_ylim(min_y - pad_y, max_y + pad_y)
        else:
            ax.set_xlim(0, 100)
            ax.set_ylim(0, 100)

        ax.set_title(f"{algo}\nScore: {prediction.get('predicted_scores', {}).get(algo, 0):.2f}\nBoxes: {valid_boxes}",
                     fontsize=9)
        ax.set_aspect('equal')
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.grid(True)

        if algo == best_algo:
            for spine in ax.spines.values():
                spine.set_edgecolor('red')
                spine.set_linewidth(3)

    plt.tight_layout()
    return fig

# ---------- Main Execution ----------
base_name = merged_entry.get("base_name", "unknown")
prediction = prediction_dict.get(base_name)

if prediction is None:
    print(f"No prediction found for base_name: {base_name}")
else:
    fig = plot_boxes(merged_entry, prediction)
    output_file = f"box_visualization_{base_name}_row_with_axes.png"
    fig.savefig(output_file, bbox_inches="tight", dpi=150)
    plt.close(fig)
    print(f"Saved to {output_file}")


  colors = cm.get_cmap('tab10', len(class_labels))


Saved to box_visualization_random_50_16_row_with_axes.png


In [11]:
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import cm
from matplotlib.colors import to_rgba

# ---------- Load Data ----------
def load_json_file(filepath):
    with open(filepath, 'r') as f:
        content = f.read().strip()
        try:
            data = json.loads(content)
            if isinstance(data, str):
                data = json.loads(data)
            return data
        except Exception as e:
            raise ValueError(f"Error loading {filepath}: {e}")

# Load box data and predictions
merged_entries = load_json_file("b124_merged_data.json")  # Now plural since it's a list
predictions = load_json_file("algorithm_predictionsb124.json")

# Make prediction lookup by base_name
prediction_dict = {
    p["base_name"]: p for p in predictions if isinstance(p, dict) and "base_name" in p
}

# ---------- Visualization Setup ----------
class_labels = [
    "initial", "SCALE", "PFS", "PFS'", "FTA",
    "VPSC", "PRISM", "GTREE", "RWordle-L"
]
colors = cm.get_cmap('tab10', len(class_labels))
algorithm_colors = {algo: to_rgba(colors(i)) for i, algo in enumerate(class_labels)}

def plot_boxes(entry, prediction, figsize=(36, 5)):
    base_name = entry["base_name"]
    best_algo = prediction["best_algorithm"]

    fig, axes = plt.subplots(1, 9, figsize=figsize)
    fig.suptitle(f"Box Visualization - {base_name}\nBest Algorithm: {best_algo}",
                fontsize=16, y=1.15)

    for i, (algo, ax) in enumerate(zip(class_labels, axes)):
        boxes = entry["boxes"][i] if i < len(entry["boxes"]) else []

        facecolor = list(algorithm_colors[algo])
        facecolor[3] = 0.3  # transparency

        valid_boxes = 0
        all_x = []
        all_y = []

        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                try:
                    x, y, w, h = [float(coord) for coord in box]
                    all_x.extend([x, x + w])
                    all_y.extend([y, y + h])

                    rect = patches.Rectangle((x, y), w, h, linewidth=1,
                                           edgecolor=algorithm_colors[algo],
                                           facecolor=facecolor)
                    ax.add_patch(rect)
                    valid_boxes += 1
                except Exception as e:
                    print(f"Invalid box in {base_name}/{algo}: {box} - {str(e)}")

        if valid_boxes > 0:
            min_x, max_x = min(all_x), max(all_x)
            min_y, max_y = min(all_y), max(all_y)
            pad_x = (max_x - min_x) * 0.05
            pad_y = (max_y - min_y) * 0.05
            ax.set_xlim(min_x - pad_x, max_x + pad_x)
            ax.set_ylim(min_y - pad_y, max_y + pad_y)
        else:
            ax.set_xlim(0, 100)
            ax.set_ylim(0, 100)

        ax.set_title(f"{algo}\nScore: {prediction.get('predicted_scores', {}).get(algo, 0):.2f}\nBoxes: {valid_boxes}",
                    fontsize=9)
        ax.set_aspect('equal')
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.grid(True)

        if algo == best_algo:
            for spine in ax.spines.values():
                spine.set_edgecolor('red')
                spine.set_linewidth(3)

    plt.tight_layout()
    return fig

# ---------- Main Execution ----------
# Process each entry in merged_entries (which is a list)
for merged_entry in merged_entries:
    base_name = merged_entry.get("base_name", "unknown")
    prediction = prediction_dict.get(base_name)

    if prediction is None:
        print(f"No prediction found for base_name: {base_name}")
    else:
        fig = plot_boxes(merged_entry, prediction)
        output_file = f"results/{base_name}_row_with_axes.png"
        fig.savefig(output_file, bbox_inches="tight", dpi=150)
        plt.close(fig)
        print(f"Saved to {output_file}")

  colors = cm.get_cmap('tab10', len(class_labels))


Saved to results/b124_row_with_axes.png


In [24]:
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import os

# Setup
os.makedirs("results", exist_ok=True)
plt.style.use('default')
plt.rcParams.update({
    'font.size': 12,
    'axes.facecolor': 'white',
    'figure.facecolor': 'white',
    'savefig.facecolor': 'white'
})

# Dataset and algorithm configuration
DATASETS = [
    {"name": "b124", "data_file": "b124_merged_data.json", "pred_file": "algorithm_predictionsb124.json"},
    {"name": "rowe", "data_file": "rowe_merged_data.json", "pred_file": "algorithm_predictionsR.json"},
    {"name": "random_50_16", "data_file": "random_50_16_merged_data.json", "pred_file": "random_50_16algorithm_predictions.json"}
]

ALGORITHMS = ["initial", "SCALE", "PFS", "PFS'", "FTA", "VPSC", "PRISM", "GTREE", "RWordle-L"]
COLORS = plt.cm.tab10(np.linspace(0, 1, len(ALGORITHMS)))

def load_json_file(filepath):
    try:
        with open(filepath, 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {filepath}: {str(e)}")
        return None

# Load and prepare data
matrix_data = []
for dataset in DATASETS:
    entries = load_json_file(dataset["data_file"])
    preds = load_json_file(dataset["pred_file"])
    
    if entries and preds:
        entries = entries if isinstance(entries, list) else [entries]
        preds = preds if isinstance(preds, list) else [preds]
        
        pred_map = {p["base_name"].lower(): p for p in preds if isinstance(p, dict)}
        for entry in entries:
            base_name = entry.get("base_name", "").lower()
            if base_name in pred_map:
                matrix_data.append({
                    "name": dataset["name"],
                    "entry": entry,
                    "prediction": pred_map[base_name]
                })

if not matrix_data:
    print("No valid data to plot!")
    exit()

# Calculate maximum dimensions across all algorithms
max_width = 0
max_height = 0
for data in matrix_data:
    for boxes in data["entry"]["boxes"]:
        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                max_width = max(max_width, box[2])
                max_height = max(max_height, box[3])

# Create figure with grid layout
fig = plt.figure(figsize=(20, 3 * len(matrix_data)))
grid = plt.GridSpec(
    len(matrix_data) + 1,
    len(ALGORITHMS) + 1,
    wspace=0.1,
    hspace=0.2,
    width_ratios=[0.5] + [1]*len(ALGORITHMS),
    height_ratios=[0.3] + [1]*len(matrix_data)
)

# Create header row
for col_idx, (algo, color) in enumerate(zip(ALGORITHMS, COLORS), 1):
    ax = plt.subplot(grid[0, col_idx])
    ax.set_facecolor('white')
    ax.text(0.5, 0.5, algo, 
            ha='center', va='center',
            fontsize=12, fontweight='bold',
            bbox=dict(facecolor=color, alpha=0.3))
    ax.axis('off')

# Create row labels
for row_idx, data in enumerate(matrix_data, 1):
    ax = plt.subplot(grid[row_idx, 0])
    ax.set_facecolor('white')
    ax.text(0.5, 0.5, data["name"],
            ha='center', va='center',
            fontsize=12, fontweight='bold',
            rotation=90)
    ax.axis('off')

# Plot data cells with consistent scaling
for row_idx, data in enumerate(matrix_data, 1):
    entry = data["entry"]
    pred = data["prediction"]
    
    for col_idx, (algo, color) in enumerate(zip(ALGORITHMS, COLORS), 1):
        ax = plt.subplot(grid[row_idx, col_idx])
        ax.set_facecolor('white')
        boxes = entry["boxes"][col_idx-1] if (col_idx-1) < len(entry["boxes"]) else []
        
        # Calculate cell-specific limits
        x_min = y_min = float('inf')
        x_max = y_max = -float('inf')
        
        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                x, y, w, h = box
                x_min = min(x_min, x)
                y_min = min(y_min, y)
                x_max = max(x_max, x + w)
                y_max = max(y_max, y + h)
        
        # Apply padding if boxes exist, else use defaults
        if x_max > -float('inf'):
            pad_x = (x_max - x_min) * 0.1
            pad_y = (y_max - y_min) * 0.1
            xlim = [x_min - pad_x, x_max + pad_x]
            ylim = [y_min - pad_y, y_max + pad_y]
        else:
            xlim = [0, max_width * 1.2]
            ylim = [0, max_height * 1.2]
        
        # Plot boxes with normalized sizes
        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                x, y, w, h = box
                # Normalize position while maintaining size
                norm_x = x_min + (x - x_min) * (xlim[1] - xlim[0]) / (x_max - x_min) if x_max > x_min else x
                norm_y = y_min + (y - y_min) * (ylim[1] - ylim[0]) / (y_max - y_min) if y_max > y_min else y
                
                rect = patches.Rectangle(
                    (norm_x, norm_y), w, h,
                    linewidth=0.8,
                    edgecolor=color,
                    facecolor=color,
                    alpha=0.6
                )
                ax.add_patch(rect)
        
        # Highlight best algorithm
        if algo == pred["best_algorithm"]:
            for spine in ax.spines.values():
                spine.set_edgecolor('red')
                spine.set_linewidth(3)
        
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.grid(True, linestyle=':', alpha=0.3)
        
        # Add max dimensions text
        if boxes:
            ax.text(0.95, 0.95, 
                   f"Max X: {x_max:.1f}\nMax Y: {y_max:.1f}",
                   ha='right', va='top',
                   transform=ax.transAxes,
                   bbox=dict(facecolor='white', alpha=0.7))

plt.tight_layout()
output_file = "results/algorithm_matrix_normalized.png"
fig.savefig(output_file, bbox_inches="tight", dpi=150)
plt.close(fig)
print(f"Created normalized matrix visualization: {output_file}")

  plt.tight_layout()


Created normalized matrix visualization: results/algorithm_matrix_normalized.png


In [26]:
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import os

# Setup
os.makedirs("results", exist_ok=True)
plt.style.use('default')
plt.rcParams.update({
    'font.size': 10,
    'axes.facecolor': 'white',
    'figure.facecolor': 'white',
    'savefig.facecolor': 'white'
})

# Configuration
DATASETS = [
    {"name": "rowe", "data_file": "rowe_merged_data.json", "pred_file": "algorithm_predictionsR.json"},
    {"name": "random_50_16", "data_file": "random_50_16_merged_data.json", "pred_file": "random_50_16algorithm_predictions.json"},
    {"name": "b124", "data_file": "b124_merged_data.json", "pred_file": "algorithm_predictionsb124.json"}
    
]

ALGORITHMS = ["initial", "SCALE", "PFS", "PFS'", "FTA", "VPSC", "PRISM", "GTREE", "RWordle-L"]
COLORS = plt.cm.tab10(np.linspace(0, 1, len(ALGORITHMS)))

def load_json_file(filepath):
    try:
        with open(filepath, 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {filepath}: {str(e)}")
        return None

# Load data
matrix_data = []
for dataset in DATASETS:
    entries = load_json_file(dataset["data_file"])
    preds = load_json_file(dataset["pred_file"])
    
    if entries and preds:
        entries = entries if isinstance(entries, list) else [entries]
        preds = preds if isinstance(preds, list) else [preds]
        
        pred_map = {p["base_name"].lower(): p for p in preds if isinstance(p, dict)}
        for entry in entries:
            base_name = entry.get("base_name", "").lower()
            if base_name in pred_map:
                matrix_data.append({
                    "name": dataset["name"],
                    "entry": entry,
                    "prediction": pred_map[base_name]
                })

if not matrix_data:
    print("No valid data to plot!")
    exit()

# Create figure with additional row for axis values
fig = plt.figure(figsize=(22, 3.5 * len(matrix_data)))
grid = plt.GridSpec(
    len(matrix_data) * 2 + 1,  # Data rows + axis rows + header
    len(ALGORITHMS) + 1,
    wspace=0.1,
    hspace=0.05,
    width_ratios=[0.5] + [1]*len(ALGORITHMS),
    height_ratios=[0.3] + [1, 0.2]*len(matrix_data)  # Alternating data and axis rows
)

# Header row
for col_idx, (algo, color) in enumerate(zip(ALGORITHMS, COLORS), 1):
    ax = plt.subplot(grid[0, col_idx])
    ax.set_facecolor('white')
    ax.text(0.5, 0.5, algo, 
            ha='center', va='center',
            fontsize=12, fontweight='bold',
            bbox=dict(facecolor=color, alpha=0.3))
    ax.axis('off')

# Data and axis rows
for row_idx, data in enumerate(matrix_data):
    entry = data["entry"]
    pred = data["prediction"]
    
    # Dataset label
    label_ax = plt.subplot(grid[row_idx*2+1, 0])
    label_ax.set_facecolor('white')
    label_ax.text(0.5, 0.5, data["name"],
                 ha='center', va='center',
                 fontsize=12, fontweight='bold',
                 rotation=90)
    label_ax.axis('off')
    
    # Plot data cells
    for col_idx, (algo, color) in enumerate(zip(ALGORITHMS, COLORS), 1):
        # Main plot area
        ax = plt.subplot(grid[row_idx*2+1, col_idx])
        ax.set_facecolor('white')
        boxes = entry["boxes"][col_idx-1] if (col_idx-1) < len(entry["boxes"]) else []
        
        # Calculate limits
        x_vals = []
        y_vals = []
        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                x, y, w, h = box
                x_vals.extend([x, x+w])
                y_vals.extend([y, y+h])
        
        if x_vals:
            x_pad = (max(x_vals) - min(x_vals)) * 0.1
            y_pad = (max(y_vals) - min(y_vals)) * 0.1
            xlim = [min(x_vals)-x_pad, max(x_vals)+x_pad]
            ylim = [min(y_vals)-y_pad, max(y_vals)+y_pad]
        else:
            xlim = [0, 100]
            ylim = [0, 100]
        
        # Plot boxes
        for box in boxes:
            if isinstance(box, list) and len(box) == 4:
                x, y, w, h = box
                rect = patches.Rectangle(
                    (x, y), w, h,
                    linewidth=0.8,
                    edgecolor=color,
                    facecolor=color,
                    alpha=0.6
                )
                ax.add_patch(rect)
        
        # Highlight best algorithm
        if algo == pred["best_algorithm"]:
            for spine in ax.spines.values():
                spine.set_edgecolor('red')
                spine.set_linewidth(3)
        
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.grid(True, linestyle=':', alpha=0.3)
        
        # Axis values display
        if row_idx < len(matrix_data):  # Only create once per row
            axis_ax = plt.subplot(grid[row_idx*2+2, col_idx])
            axis_ax.set_facecolor('white')
            
            if x_vals:
                axis_ax.text(0.5, 0.7, f"X: {min(x_vals):.1f}-{max(x_vals):.1f}", 
                            ha='center', va='center', fontsize=9)
                axis_ax.text(0.5, 0.3, f"Y: {min(y_vals):.1f}-{max(y_vals):.1f}", 
                            ha='center', va='center', fontsize=9)
            else:
                axis_ax.text(0.5, 0.5, "No data", ha='center', va='center', fontsize=9)
            
            axis_ax.axis('off')

plt.tight_layout()
output_file = "results/algorithm_matrix_with_axis_values.png"
fig.savefig(output_file, bbox_inches="tight", dpi=150)
plt.close(fig)
print(f"Created matrix visualization with axis values: {output_file}")

  plt.tight_layout()


Created matrix visualization with axis values: results/algorithm_matrix_with_axis_values.png
