In this notebook, we combine the results of simulation but in a memory efficient way.

In [1]:
import torch
import os
import json
import numpy as np
import argparse

In [2]:
# Define the folder path.
folder_loc = '/vast/sr6364/perturbed_organics/data/adaptive_phase_diagram/delocalized'
# folder_name = 'delocalized_goe_symmetric_N_100_s_100_mu_0.0_num_delta_200_num_input_200_num_trials_100_b0_1.0_b1_1.0_phase_diagram_log-scale'
folder_name = 'delocalized_goe_symmetric_N_100_s_100_mu_0.0_num_delta_200_num_input_200_num_trials_100_b0_1.0_b1_1.0_phase_diagram_linear'
path = os.path.join(folder_loc, folder_name)

# Load parameters from JSON.
param_file = os.path.join(path, "parameters.json")
with open(param_file, "r") as f:
    params = json.load(f)

N = params["N"]
num_tasks = params["num_tasks"]
num_delta = params["num_delta"]
num_input = params["num_input"]
num_trials = params["num_trials"]
input_scale = params["input_scale"]

In [5]:
def combine_variable(var_name, shape, fill_value, dtype, path, num_tasks, mask_source=None):
    """
    Combines task-specific files for a given variable into a single tensor.
    
    Parameters:
      - var_name: Name of the variable to combine (e.g., "spectral_radius").
      - shape: Full shape of the final combined tensor.
      - fill_value: The fill value for positions not updated.
      - dtype: Data type for the tensor.
      - path: Directory containing the task files.
      - num_tasks: Total number of task files.
      - mask_source: If provided, the name of the task files to use as a mask.
                     (For all variables here, we use the "condition" task files.)
                     
    The function loads for each task:
      - The mask from "<mask_source>_task_{i}.pt" (if mask_source is given, otherwise uses var_name).
      - The variable data from "<var_name>_task_{i}.pt".
      
    It uses the condition (mask != -1) to determine valid indices.
    After combining, if the variable is not the mask source, its task files are deleted immediately.
    (For "condition" we keep its task files until after the other variables are combined because they serve as the mask.)
    """
    combined = torch.full(shape, fill_value=fill_value, dtype=dtype)
    
    for task_id in range(num_tasks):
        # Use the mask from the mask_source if provided, otherwise use var_name.
        mask_key = mask_source if mask_source is not None else var_name
        mask_file = os.path.join(path, f"{mask_key}_task_{task_id}.pt")
        mask_tensor = torch.load(mask_file)
        indices = (mask_tensor != -1).nonzero(as_tuple=True)
        
        # Load variable data and update only the valid indices.
        var_file = os.path.join(path, f"{var_name}_task_{task_id}.pt")
        task_data = torch.load(var_file)
        combined[indices] = task_data[indices]
    
    # If this variable is not used as the mask, delete its task files.
    if mask_source is None or var_name != mask_source:
        for task_id in range(num_tasks):
            var_file = os.path.join(path, f"{var_name}_task_{task_id}.pt")
            if os.path.exists(var_file):
                os.remove(var_file)
    
    # Note: For "condition" we keep the int8 type without converting it to bool.
    
    # Save the combined tensor.
    out_file = os.path.join(path, f"{var_name}.pt")
    torch.save(combined, out_file)
    print(f"Combined and saved {var_name}.pt")

# Define the specifications for each variable.
# Here we use the "condition" task files as the mask for all variables.
var_specs = {
    "condition": {
        "shape": (num_delta, num_input, num_trials),
        "fill": -1,
        "dtype": torch.int8,
        "mask_source": "condition"
    },
    "spectral_radius": {
        "shape": (num_delta, num_input, num_trials),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "norm_fixed_point_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "norm_fixed_point_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "actual_fixed_point_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "actual_fixed_point_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "first_order_perturb_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "first_order_perturb_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "condition"
    },
    "eigvals_J": {
        "shape": (num_delta, num_input, num_trials, 2 * N),
        "fill": float("nan"),
        "dtype": torch.complex64,
        "mask_source": "condition"
    },
}

# First, combine all variables except "condition" (which is used as the mask).
for var_name, spec in var_specs.items():
    if var_name == "condition":
        continue
    combine_variable(
        var_name,
        shape=spec["shape"],
        fill_value=spec["fill"],
        dtype=spec["dtype"],
        path=path,
        num_tasks=num_tasks,
        mask_source=spec["mask_source"]
    )

# Now combine the "condition" variable.
combine_variable(
    "condition",
    shape=var_specs["condition"]["shape"],
    fill_value=var_specs["condition"]["fill"],
    dtype=var_specs["condition"]["dtype"],
    path=path,
    num_tasks=num_tasks,
    mask_source=var_specs["condition"]["mask_source"]
)

# Delete any remaining "condition" task files.
for task_id in range(num_tasks):
    mask_file = os.path.join(path, f"condition_task_{task_id}.pt")
    if os.path.exists(mask_file):
        os.remove(mask_file)

print("Results saved successfully")

FileNotFoundError: [Errno 2] No such file or directory: '/vast/sr6364/perturbed_organics/data/adaptive_phase_diagram/delocalized/delocalized_goe_symmetric_N_100_s_100_mu_0.0_num_delta_200_num_input_200_num_trials_100_b0_1.0_b1_1.0_phase_diagram_linear/spectral_radius_task_0.pt'

In [None]:
## code to combbine old code 

def combine_variable(var_name, shape, fill_value, dtype, path, num_tasks, mask_source=None):
    """
    Combines task-specific files for a given variable into a single tensor.
    
    Parameters:
      - var_name: Name of the variable to combine (e.g., "spectral_radius").
      - shape: Full shape of the final combined tensor.
      - fill_value: The fill value for positions not updated.
      - dtype: Data type for the tensor.
      - path: Directory containing the task files.
      - num_tasks: Total number of task files.
      - mask_source: If provided, the name of the task files to use as a mask. 
                     (For all variables here, we use the bool_stable task files.)
                     
    The function loads for each task:
      - The mask from "<mask_source>_task_{i}.pt" (if mask_source is given, otherwise uses var_name).
      - The variable data from "<var_name>_task_{i}.pt".
    It uses the condition (mask != -1) to determine valid indices.
    
    After combining, if var_name is not the same as mask_source, the task files for var_name
    are deleted immediately. (For "bool_stable" we want to keep its task files until after the 
    other variables are combined because they serve as the mask.)
    """
    combined = torch.full(shape, fill_value=fill_value, dtype=dtype)

    # Loop over each task.
    for task_id in range(num_tasks):
        # Determine which task file to use for the mask.
        # If mask_source is provided, use that; otherwise, use var_name.
        mask_key = mask_source if mask_source is not None else var_name
        mask_file = os.path.join(path, f"{mask_key}_task_{task_id}.pt")
        mask_tensor = torch.load(mask_file)
        indices = (mask_tensor != -1).nonzero(as_tuple=True)

        # Load the variable-specific data.
        var_file = os.path.join(path, f"{var_name}_task_{task_id}.pt")
        task_data = torch.load(var_file)
        combined[indices] = task_data[indices]

    # If the variable is not the mask source, delete its task files immediately.
    # (For bool_stable, we want to keep its task files for later use as a mask.)
    if mask_source is None or var_name != mask_source:
        for task_id in range(num_tasks):
            var_file = os.path.join(path, f"{var_name}_task_{task_id}.pt")
            if os.path.exists(var_file):
                os.remove(var_file)
    
    # For bool_stable, convert the final result to a boolean tensor.
    if var_name == "bool_stable":
        combined = combined.bool()

    # Save the combined tensor.
    out_file = os.path.join(path, f"{var_name}.pt")
    torch.save(combined, out_file)
    print(f"Combined and saved {var_name}.pt")

# Define the specifications for each variable.
# Note: We use the bool_stable task files as the mask for all variables.
var_specs = {
    "bool_stable": {
        "shape": (num_delta, num_input, num_trials),
        "fill": -1,
        "dtype": torch.int8,
        "mask_source": "bool_stable"  # For bool_stable, use its own task files (and postpone deletion).
    },
    "spectral_radius": {
        "shape": (num_delta, num_input, num_trials),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "norm_fixed_point_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "norm_fixed_point_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "actual_fixed_point_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "actual_fixed_point_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "first_order_perturb_y": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "first_order_perturb_a": {
        "shape": (num_delta, num_input, num_trials, N),
        "fill": float("nan"),
        "dtype": torch.float16,
        "mask_source": "bool_stable"
    },
    "eigvals_J": {
        "shape": (num_delta, num_input, num_trials, 2 * N),
        "fill": float("nan"),
        "dtype": torch.complex64,
        "mask_source": "bool_stable"
    },
}

# To ensure that the bool_stable task files are available for all other variables,
# first combine all variables EXCEPT bool_stable.
for var_name, spec in var_specs.items():
    if var_name == "bool_stable":
        continue
    combine_variable(
        var_name,
        shape=spec["shape"],
        fill_value=spec["fill"],
        dtype=spec["dtype"],
        path=path,
        num_tasks=num_tasks,
        mask_source=spec["mask_source"]
    )

# Now combine bool_stable. Its task files are used as the mask and are kept (i.e. not deleted)
# inside combine_variable since var_name == mask_source.
combine_variable(
    "bool_stable",
    shape=var_specs["bool_stable"]["shape"],
    fill_value=var_specs["bool_stable"]["fill"],
    dtype=var_specs["bool_stable"]["dtype"],
    path=path,
    num_tasks=num_tasks,
    mask_source=var_specs["bool_stable"]["mask_source"]
)

# Finally, delete any remaining bool_stable task files.
for task_id in range(num_tasks):
    mask_file = os.path.join(path, f"bool_stable_task_{task_id}.pt")
    if os.path.exists(mask_file):
        os.remove(mask_file)

# Optionally, compute and save percent_stable using the combined bool_stable.
bool_stable = torch.load(os.path.join(path, "bool_stable.pt"))
percent_stable = bool_stable.float().mean(dim=2) * 100
torch.save(percent_stable, os.path.join(path, "percent_stable.pt"))
print("Combined and saved percent_stable.pt")

print("Results saved successfully")
