In [2]:
import re
from collections import defaultdict

def get_loss_dict(file_path):
    # Dictionary to store results
    loss_dict = defaultdict(list)

    # Pattern to match model transition line
    transition_pattern = re.compile(r"(nonoverlappingCNN_\w+ -> fcnn_decreasing_\w+)")
    # Pattern to match final loss
    loss_pattern = re.compile(r"Final Loss:\s*([\d\.eE+-]+)")

    with open(file_path, "r") as f:
        current_key = None
        for line in f:
            # Check if the line is a model transition
            transition_match = transition_pattern.search(line)
            if transition_match:
                current_key = transition_match.group(1)
                continue

            # If inside a relevant section, check for Final Loss
            if current_key:
                loss_match = loss_pattern.search(line)
                if loss_match:
                    final_loss = float(loss_match.group(1))
                    loss_dict[current_key].append(final_loss)
                    current_key = None  # reset for next block

    for key, losses in loss_dict.items():
        print(len(losses), key)
        avg_loss = sum(losses) / len(losses)
        print(f"{key}: {losses} → Average Final Loss = {avg_loss:.4f}")

In [8]:
get_loss_dict("../experiment_output/experiment__02042025_l1_init0.2.txt")

5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid
nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid: [0.0027, 0.0028, 0.0028, 0.0027, 0.0028] → Average Final Loss = 0.0028
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu
nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu: [0.0083, 0.0083, 0.0083, 0.0083, 0.0086] → Average Final Loss = 0.0084
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh
nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh: [0.0313, 0.0308, 0.0304, 0.0304, 0.0304] → Average Final Loss = 0.0307
5 nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid
nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid: [3.3592, 3.2934, 3.4703, 3.4351, 3.2133] → Average Final Loss = 3.3543
5 nonoverlappingCNN_relu -> fcnn_decreasing_relu
nonoverlappingCNN_relu -> fcnn_decreasing_relu: [0.0055, 0.0059, 0.0061, 0.006, 0.006] → Average Final Loss = 0.0059
5 nonoverlappingCNN_relu -> fcnn_decreasing_tanh
nonoverlappingCNN_relu -> fcnn_decreasing_tanh: [3.4998, 3.4473, 3.6127, 3.5994, 3.

In [6]:
get_loss_dict("../experiment_output/experiment__01042025_init1.txt")

5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid
nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid: [0.0088, 0.0088, 0.0088, 0.0087, 0.0088] → Average Final Loss = 0.0088
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu
nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu: [0.2751, 0.2741, 0.2747, 0.276, 0.2769] → Average Final Loss = 0.2754
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh
nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh: [0.0263, 0.026, 0.0259, 0.0262, 0.0264] → Average Final Loss = 0.0262
5 nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid
nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid: [3.3977, 3.1759, 3.4394, 3.3148, 3.2189] → Average Final Loss = 3.3093
5 nonoverlappingCNN_relu -> fcnn_decreasing_relu
nonoverlappingCNN_relu -> fcnn_decreasing_relu: [5.0859, 5.0544, 4.7707, 4.9846, 4.8574] → Average Final Loss = 4.9506
5 nonoverlappingCNN_relu -> fcnn_decreasing_tanh
nonoverlappingCNN_relu -> fcnn_decreasing_tanh: [3.5291, 3.4968, 3.3229, 3.2398, 3.

In [10]:
get_loss_dict("../experiment_output/experiment__02042025_l1_init1.txt")

5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid
nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid: [0.0088, 0.0088, 0.0088, 0.0087, 0.0087] → Average Final Loss = 0.0088
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu
nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu: [0.2879, 0.2777, 0.2749, 0.2791, 0.2756] → Average Final Loss = 0.2790
5 nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh
nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh: [0.029, 0.0288, 0.029, 0.029, 0.0288] → Average Final Loss = 0.0289
5 nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid
nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid: [3.3154, 3.3985, 3.4754, 3.4801, 3.2279] → Average Final Loss = 3.3795
5 nonoverlappingCNN_relu -> fcnn_decreasing_relu
nonoverlappingCNN_relu -> fcnn_decreasing_relu: [4.986, 5.0586, 5.1487, 5.1929, 4.8674] → Average Final Loss = 5.0507
5 nonoverlappingCNN_relu -> fcnn_decreasing_tanh
nonoverlappingCNN_relu -> fcnn_decreasing_tanh: [3.3907, 3.5049, 3.5351, 3.5345, 3.29

In [12]:
import torch

def load_model_weights(model_path):
    """Load model weights from a .pth file."""
    checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
    model_state = checkpoint["student_model_state_dict"]
    return {k: v.cpu().numpy() for k, v in model_state.items() if "weight" in k}

# compute level of sparsity of model
def compute_sparsity(weights_dict):
    """Compute the sparsity of model weights."""
    sparsity = {}
    for layer_name, weights in weights_dict.items():
        num_elements = weights.size
        num_zeros = (abs(weights) < 1e-4).sum()
        sparsity[layer_name] = num_zeros / num_elements    
    return sparsity

In [None]:
import os

def create_all_heatmaps(directory):
    model_sparsities = {}  # model filename → average sparsity
    best_model = None
    best_sparsity = float("-inf")

    for root, _, files in os.walk(directory):
        for file in files:
            if not file.endswith(".pth"):
                continue

            model_path = os.path.join(root, file)
            print(f"\nLoading weights from {model_path}...")

            weights_dict = load_model_weights(model_path)
            sparsity = compute_sparsity(weights_dict)
            avg_sparsity = sum(sparsity.values()) / len(sparsity)

            model_sparsities[file] = avg_sparsity

            for layer_name, sparsity_value in sparsity.items():
                print(f"Sparsity of {layer_name}: {sparsity_value * 100:.2f}%")

            print(f"→ Average sparsity for '{file}': {avg_sparsity * 100:.2f}%")

            if avg_sparsity > best_sparsity:
                best_sparsity = avg_sparsity
                best_model = file

    print("\n========== Summary ==========")
    for model_file, avg in model_sparsities.items():
        print(f"{model_file}: {avg * 100:.2f}% avg sparsity")

    return model_sparsities, best_model


In [16]:
create_all_heatmaps("../experiment_output/Experiment3/exp3_init0.2_5seeds")


Loading weights from ../experiment_output/Experiment3/exp3_init0.2_5seeds/experiments_02042025_overlapping_l1_init0.2_1/nonoverlappingCNN_relu__fcnn_decreasing_sigmoid.pth...
Sparsity of layers.0.weight: 0.03%
Sparsity of layers.1.weight: 0.23%
Sparsity of layers.2.weight: 0.00%
→ Average sparsity for 'nonoverlappingCNN_relu__fcnn_decreasing_sigmoid.pth': 0.09%

Loading weights from ../experiment_output/Experiment3/exp3_init0.2_5seeds/experiments_02042025_overlapping_l1_init0.2_1/nonoverlappingCNN_sigmoid__fcnn_decreasing_relu.pth...
Sparsity of layers.0.weight: 0.13%
Sparsity of layers.1.weight: 0.06%
Sparsity of layers.2.weight: 0.00%
→ Average sparsity for 'nonoverlappingCNN_sigmoid__fcnn_decreasing_relu.pth': 0.06%

Loading weights from ../experiment_output/Experiment3/exp3_init0.2_5seeds/experiments_02042025_overlapping_l1_init0.2_1/nonoverlappingCNN_sigmoid__fcnn_decreasing_tanh.pth...
Sparsity of layers.0.weight: 0.00%
Sparsity of layers.1.weight: 0.02%
Sparsity of layers.2.wei

({'nonoverlappingCNN_relu__fcnn_decreasing_sigmoid.pth': np.float64(0.0010850694444444443),
  'nonoverlappingCNN_sigmoid__fcnn_decreasing_relu.pth': np.float64(0.0007459852430555556),
  'nonoverlappingCNN_sigmoid__fcnn_decreasing_tanh.pth': np.float64(0.0006646050347222221),
  'nonoverlappingCNN_tanh__fcnn_decreasing_tanh.pth': np.float64(0.0013699001736111112),
  'nonoverlappingCNN_tanh__fcnn_decreasing_sigmoid.pth': np.float64(0.0008951822916666666),
  'nonoverlappingCNN_relu__fcnn_decreasing_tanh.pth': np.float64(0.0007459852430555555),
  'nonoverlappingCNN_relu__fcnn_decreasing_relu.pth': np.float64(0.0024278428819444445),
  'nonoverlappingCNN_sigmoid__fcnn_decreasing_sigmoid.pth': np.float64(0.0030788845486111106),
  'nonoverlappingCNN_tanh__fcnn_decreasing_relu.pth': np.float64(0.0008138020833333334),
  'nonoverlapping_CNN_all_tanh__fcnn_decreasing_all_tanh.pth': np.float64(0.0011121961805555557)},
 'nonoverlappingCNN_sigmoid__fcnn_decreasing_tanh.pth')

In [18]:
#extract the config setting from model path
path = "../experiment_output/Experiment2/perfect_results/experiments_succ_l1_1e-5_b16_lr0.01/nonoverlapping_CNN_relu__overlapping_CNN_relu.pth"

def load_model_weights(model_path):
    """Load model weights from a .pth file."""
    checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
    config = checkpoint["config"]
    print(config)

In [19]:
load_model_weights(path)

{'batch_size': 16, 'patience': 50, 'dataset_size': 1024, 'num_epochs': 30000, 'lr': [0.01], 'l1_norm': [1e-05], 'l2_norm': [0], 'loss_func': 'MSE', 'init': True, 'clipping': 0, 'same_act': True, 'save_path': '/Users/laura/Documents/MIT/Semesterproject/investigating_sparseNN/experiment_output'}
