## Calculate the averge loss and standard deviation of the loss

In [1]:
import os
import re
from collections import defaultdict
from statistics import mean, stdev

def parse_final_losses_from_file(filepath):
    with open(filepath, 'r') as file:
        content = file.read()

    # Split content by blocks between delimiter lines
    blocks = content.split("=" * 80)

    model_losses = defaultdict(list)

    for block in blocks:
        model_match = re.search(r'([^\s]+)\s*->\s*([^\n]+)', block)
        loss_match = re.search(r'Final Loss:\s*([0-9.]+)', block)

        if model_match and loss_match:
            teacher_model = model_match.group(1).strip()
            student_model = model_match.group(2).strip()
            loss = float(loss_match.group(1))

            # Aggregate by teacher -> student combination
            pair_key = f"{teacher_model} -> {student_model}"
            model_losses[pair_key].append(loss)
    return model_losses

def aggregate_losses(folder_path):
    aggregated_losses = defaultdict(list)

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                file_losses = parse_final_losses_from_file(file_path)
                for model, losses in file_losses.items():
                    aggregated_losses[model].extend(losses)
                    
    summary = {}
    for model_pair, losses in aggregated_losses.items():
        avg = mean(losses)
        std = stdev(losses) if len(losses) > 1 else 0.0
        summary[model_pair] = (avg, std)

    return summary

In [17]:
#init 0.2
folder = "../experiment_output/Experiment3/init0.2"
results = aggregate_losses(folder)

print("Average Final Loss ± Std per Teacher -> Student Model:")
for model_pair, (avg_loss, std_loss) in sorted(results.items()):
    print(f"{model_pair}: {avg_loss:.4f} ± {std_loss:.4f}")

Average Final Loss ± Std per Teacher -> Student Model:
nonoverlappingCNN_relu -> fcnn_decreasing_relu: 0.0003 ± 0.0001
nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid: 3.3007 ± 0.0316
nonoverlappingCNN_relu -> fcnn_decreasing_tanh: 3.5434 ± 0.0319
nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu: 0.0111 ± 0.0001
nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid: 0.0026 ± 0.0000
nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh: 0.2528 ± 0.0000
nonoverlappingCNN_tanh -> fcnn_decreasing_relu: 0.1941 ± 0.0010
nonoverlappingCNN_tanh -> fcnn_decreasing_sigmoid: 0.2940 ± 0.0994
nonoverlappingCNN_tanh -> fcnn_decreasing_tanh: 0.0004 ± 0.0000


In [18]:
#init 1
folder = "../experiment_output/Experiment3/init1"
results = aggregate_losses(folder)

print("Average Final Loss ± Std per Teacher -> Student Model:")
for model_pair, (avg_loss, std_loss) in sorted(results.items()):
    print(f"{model_pair}: {avg_loss:.4f} ± {std_loss:.4f}")

Average Final Loss ± Std per Teacher -> Student Model:
nonoverlappingCNN_relu -> fcnn_decreasing_relu: 0.0003 ± 0.0001
nonoverlappingCNN_relu -> fcnn_decreasing_sigmoid: 3.2904 ± 0.0496
nonoverlappingCNN_relu -> fcnn_decreasing_tanh: 3.5330 ± 0.0501
nonoverlappingCNN_sigmoid -> fcnn_decreasing_relu: 0.0111 ± 0.0001
nonoverlappingCNN_sigmoid -> fcnn_decreasing_sigmoid: 0.0004 ± 0.0000
nonoverlappingCNN_sigmoid -> fcnn_decreasing_tanh: 0.2526 ± 0.0001
nonoverlappingCNN_tanh -> fcnn_decreasing_relu: 0.1938 ± 0.0013
nonoverlappingCNN_tanh -> fcnn_decreasing_sigmoid: 0.1850 ± 0.0015
nonoverlappingCNN_tanh -> fcnn_decreasing_tanh: 0.0004 ± 0.0000


## Analyse sparsity

In [14]:
import torch
import os

def load_model_weights(model_path):
    """Load model weights from a .pth file."""
    checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
    model_state = checkpoint["student_model_state_dict"]
    return {k: v.cpu().numpy() for k, v in model_state.items() if "weight" in k}

# compute level of sparsity of model
def compute_sparsity(weights_dict):
    """Compute the sparsity of model weights."""
    sparsity = {}
    for layer_name, weights in weights_dict.items():
        num_elements = weights.size
        num_zeros = (abs(weights) < 1e-4).sum()
        sparsity[layer_name] = num_zeros / num_elements    
    return sparsity

def compute_all_sparsity(directory):
    model_sparsities = {}  # model filename → average sparsity
    best_model = None
    best_sparsity = float("-inf")

    for root, _, files in os.walk(directory):
        for file in files:
            if not file.endswith(".pth"):
                continue

            model_path = os.path.join(root, file)

            weights_dict = load_model_weights(model_path)
            sparsity = compute_sparsity(weights_dict)
            avg_sparsity = sum(sparsity.values()) / len(sparsity)

            model_sparsities[file] = avg_sparsity

            for layer_name, sparsity_value in sparsity.items():
                pass
            #     print(f"Sparsity of {layer_name}: {sparsity_value * 100:.2f}%")

            # print(f"→ Average sparsity for '{file}': {avg_sparsity * 100:.2f}%")

            if avg_sparsity > best_sparsity:
                best_sparsity = avg_sparsity
                best_model = file

    print("\n========== Summary ==========")
    for model_file, avg in model_sparsities.items():
        print(f"{model_file}: {avg * 100:.2f}% avg sparsity")

    # return model_sparsities, best_model

In [15]:
#init 0.2
compute_all_sparsity("../experiment_output/Experiment3/init0.2")


nonoverlappingCNN_relu__fcnn_decreasing_sigmoid.pth: 85.52% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_relu.pth: 84.43% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_tanh.pth: 98.69% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_tanh.pth: 95.27% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_sigmoid.pth: 66.67% avg sparsity
nonoverlappingCNN_relu__fcnn_decreasing_tanh.pth: 86.18% avg sparsity
nonoverlappingCNN_relu__fcnn_decreasing_relu.pth: 97.77% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_sigmoid.pth: 66.67% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_relu.pth: 76.62% avg sparsity


In [16]:
#init 1
compute_all_sparsity("../experiment_output/Experiment3/init1")


nonoverlappingCNN_relu__fcnn_decreasing_sigmoid.pth: 59.77% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_relu.pth: 88.36% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_tanh.pth: 98.42% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_tanh.pth: 95.28% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_sigmoid.pth: 63.82% avg sparsity
nonoverlappingCNN_relu__fcnn_decreasing_tanh.pth: 78.03% avg sparsity
nonoverlappingCNN_relu__fcnn_decreasing_relu.pth: 96.33% avg sparsity
nonoverlappingCNN_sigmoid__fcnn_decreasing_sigmoid.pth: 65.41% avg sparsity
nonoverlappingCNN_tanh__fcnn_decreasing_relu.pth: 78.80% avg sparsity
