In [4]:
import os
import yaml
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import pickle
import numpy as np
from tqdm import tqdm
from net import *
from connectome_utils import *
from sklearn.model_selection import StratifiedShuffleSplit

# Load config
with open("config.yaml", "r") as f:
    config_data = yaml.safe_load(f)

# Global parameters
signed = config_data.get("signed", True)
sio = config_data.get("sio", True)
num_trials = config_data.get("num_trials", 10)
num_epochs = config_data.get("num_epochs", 10)
batch_size = config_data.get("batch_size", 64)
learning_rate = config_data.get("learning_rate", 0.001)
experiments = config_data.get("experiments", {})

# Few-shot settings
fewshot_config = config_data.get("fewshot", {})
fewshot_enabled = fewshot_config.get("enabled", False)
fewshot_samples = fewshot_config.get("samples", 60)
fewshot_batch_size = fewshot_config.get("batch_size", 10)
if fewshot_enabled:
    fewshot_experiments = {}
    for exp_id, exp_config in experiments.items():
        cfg = exp_config.copy()
        cfg["fewshot"] = fewshot_samples
        cfg["fewshot_batch_size"] = fewshot_batch_size
        fewshot_experiments[f"{exp_id}_fewshot_{fewshot_samples}"] = cfg
    experiments = fewshot_experiments

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create stratified few-shot subset
def create_fewshot_subset(dataset, seed, samples_per_class=60):
    targets = np.array(dataset.targets)
    train_size = (samples_per_class * 10) / len(targets)
    sss = StratifiedShuffleSplit(n_splits=1, train_size=train_size, random_state=seed)
    indices, _ = next(sss.split(np.zeros_like(targets), targets))
    return torch.utils.data.Subset(dataset, indices)

def get_weight_matrix(base, mode):
    if mode == 'random':
        # use He Initialization for ReLU
        arr_np = (np.random.randn(*base.shape) / np.sqrt(base.shape[0])).astype(np.float32)
        return arr_np
    
    elif mode == 'droso':
        return base
    
    elif mode == 'permuted_droso':
        nonzero_vals = base[base != 0].astype(np.float32)
        np.random.shuffle(nonzero_vals)
        
        non_zero_count = len(nonzero_vals)
        idx = np.random.choice(base.size, non_zero_count, replace=False)
        arr_np = np.zeros_like(base, dtype=np.float32)
        
        arr_np_flat = arr_np.flatten()
        arr_np_flat[idx] = nonzero_vals
        arr_np = arr_np_flat.reshape(base.shape)
        
        return arr_np
    elif mode == 'randsparse':
        non_zero = np.count_nonzero(base)
        mask = np.zeros(base.shape, dtype=np.float32)
        idx = np.random.permutation(mask.size)[:non_zero]
        mask.flat[idx] = 1
        scaling_factor = np.sqrt(non_zero / base.size)  # normalization factor
        arr_np = (np.random.randn(*base.shape) * scaling_factor).astype(np.float32) * mask
        return arr_np
    else:
        return None

def load_connectivity_info(cfg_data):
    if sio:
        return load_sio_connectivity_data(
            connectivity_path=cfg_data["csv_paths"]["signed"],
            annotation_path=cfg_data["annotation_path"], rescale_factor=cfg_data.get('rescale_factor', 4e-2)
        )
    else:
        return load_connectivity_data(
            connectivity_path=cfg_data["csv_paths"]["signed"],
            annotation_path=cfg_data["annotation_path"], rescale_factor=cfg_data.get('rescale_factor', 4e-2)
        )

def load_datasets(transform):
    train_set = datasets.MNIST('./data', train=True, download=True, transform=transform)
    test_set = datasets.MNIST('./data', train=False, transform=transform)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=256, shuffle=False)
    return train_set, test_loader

def initialize_model(config):
    if config['type'] == 'basicrnn':
        conn = load_connectivity_info(config_data)
        W_init = get_weight_matrix(conn['W'], config.get('init')) # conn['W'] is the rearranged connectivity matrix

        # Get LoRA configuration
        lora_config = config.get('lora', {})
        use_lora = lora_config.get('enabled', False)
        lora_rank = lora_config.get('rank', 8)
        lora_alpha = lora_config.get('alpha', 16)

        return BasicRNN(
            W_init=W_init,
            input_dim=784,
            sensory_dim=conn['W_ss'].shape[0],
            internal_dim=conn['W_rr'].shape[0],
            output_dim=conn['W_oo'].shape[0],
            num_classes=10,
            trainable=config.get('trainable'),
            pruning=config.get('pruning'),
            target_nonzeros=np.count_nonzero(W_init),
            lambda_l1=config.get('lambda_l1'),
            use_lora=use_lora,
            lora_rank=lora_rank,
            lora_alpha=lora_alpha
        )   
    elif config['type'] == 'threehiddenmlp':
        return ThreeHiddenMLP(784, 29, 147, 400, 10, config.get('freeze', False))
    else:
        raise ValueError(f"Unknown model type: {config['type']}")

# Train one epoch
def train_epoch(model, optimizer, criterion, train_loader):
    model.train()
    total_loss, correct, total = 0.0, 0, 0
    pbar = tqdm(train_loader, unit="batch", desc="Training")
    for data, target in pbar:
        data.squeeze(1)
        target = target.to(device)
        optimizer.zero_grad()

        output = model(data)

        # L1-penalized training-loss to perserve sparsity level
        if model.pruning:
            logits = model(data)
            ce_loss = F.cross_entropy(logits, target)
            l1_loss = model.lambda_l1 * model.get_l1_loss() if model.lambda_l1 is not None else 0
            loss = ce_loss + l1_loss
        else:
            loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.size(0)
        correct += output.argmax(dim=1).eq(target).sum().item()
        total += data.size(0)
        train_acc = correct / total if total else 0
        pbar.set_postfix(loss=f"{loss.item():.4f}", train_acc=f"{train_acc:.2%}")
    
    if hasattr(model, "enforce_sparsity") and model.pruning:
        print("enforce sparsity start, nonzeros: ", torch.count_nonzero(model.W).item())
        model.enforce_sparsity()
        print("enforce sparsity end, nonzeros: ", torch.count_nonzero(model.W).item())
    return total_loss / total, correct / total

# Evaluate model and compute inference FLOPs
def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data, target in test_loader:
            data.squeeze(1)
            target = target.to(device)
            output = model(data)
            correct += output.argmax(dim=1).eq(target).sum().item()
            total += target.size(0)
    acc = correct / total if total > 0 else 0
    return acc

# Run training loop and record results
def run_training_loop(model, config, full_train_set, test_loader, trial_num, num_epochs, batch_size, fewshot_batch_size):
    results = {"epoch_train_loss": [],
               "epoch_train_acc": [],
               "epoch_test_acc": [],
               'submodules_nonzero': [],
               'similarity_dict': []}
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # initial evaluation for epoch 0
    init_acc = evaluate(model, test_loader)
    results["epoch_test_acc"].append(init_acc)

    print(f"Trial {trial_num} | Epoch 0 | Test Acc: {init_acc:.2%}")
    for epoch in range(num_epochs):
        if "fewshot" in config:
            subset = create_fewshot_subset(full_train_set, epoch, config["fewshot"])
            train_loader = torch.utils.data.DataLoader(subset, batch_size=config.get("fewshot_batch_size", fewshot_batch_size), shuffle=True)
        else:
            train_loader = torch.utils.data.DataLoader(full_train_set, batch_size=batch_size, shuffle=True)

        epoch_loss, epoch_acc = train_epoch(model, optimizer, criterion, train_loader)

        results["epoch_train_loss"].append(epoch_loss)
        results["epoch_train_acc"].append(epoch_acc)

        if model.use_lora:
            similarity_metrics = model.calculate_matrix_similarity()
            results['similarity_dict'].append(similarity_metrics)  
            print(f"similarity metrics: {similarity_metrics}")

        test_acc = evaluate(model, test_loader)

        # save for further flops calculation
        submodule_nonzero_dict = {}
        for name, submodule in model.named_children():
            sub_nonzero = 0
            for param in submodule.parameters(recurse=False):
                sub_nonzero += torch.count_nonzero(param).item()
            submodule_nonzero_dict[name] = sub_nonzero
        submodule_nonzero_dict['total'] = sum(torch.count_nonzero(p).item() for p in model.parameters())
        results['submodules_nonzero'].append(submodule_nonzero_dict)
        results["epoch_test_acc"].append(test_acc)

        print(f"submodule nonzero values: {submodule_nonzero_dict}")
        print(f"Trial {trial_num} | Epoch {epoch+1} | Test Acc: {test_acc:.2%}")

    return results

def save_results(exp_id, config, trial_num, results, signed):
    os.makedirs("results", exist_ok=True)
    filename = f"{exp_id}_trial{trial_num}"
    if "fewshot" in config:
        filename = f"{exp_id}_trial{trial_num}"
    if signed:
        filename += ".signed"
    filename += ".pkl"
    with open(os.path.join("results", filename), "wb") as f:
        pickle.dump(results, f)

# Full experiment run
def train_experiment(exp_id, config, trial_num):
    print("========================================")
    print(f"Starting Experiment: {exp_id} Trial {trial_num}")
    print("Experiment configuration:")
    for key, value in config.items():
        print(f"  {key}: {value}")
    print("========================================\n")
    torch.manual_seed(trial_num) # TODO
    np.random.seed()
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    full_train_set, test_loader = load_datasets(transform)
    model = initialize_model(config)
    model.to(device)
    results = run_training_loop(model, config, full_train_set, test_loader, trial_num,
                                  num_epochs, batch_size, fewshot_batch_size)
    save_results(exp_id, config, trial_num, results, signed)

if __name__ == "__main__":
    for exp_id, config in experiments.items():
        for trial_num in range(1, num_trials + 1):
            print(f"\n=== Training {exp_id} Trial {trial_num} ===")
            train_experiment(exp_id, config, trial_num)



=== Training Learnable_RNN_Same_Sparsity Trial 1 ===
Starting Experiment: Learnable_RNN_Same_Sparsity Trial 1
Experiment configuration:
  type: basicrnn
  trainable: True
  init: permuted_droso
  pruning: True
  lambda_l1: 0.0001

Annotation file: Found 29 sensory neuron IDs
Annotation file: Found 400 output neuron IDs
Connectivity matrix contains 2952 neurons
After filtering, found 29 sensory neurons in matrix
After filtering, found 400 output neurons in matrix
Remaining 2523 neurons classified as internal
BasicRNN init: trainable=True, pruning=True, target_nonzeros=63545, lambda_l1=0.0001
LoRA config: use_lora=False, rank=8, alpha=16
W_init.shape: (2952, 2952), sensory_dim: 29, internal_dim: 2523, output_dim: 400
Trial 1 | Epoch 0 | Test Acc: 10.30%


Training: 100%|██████████| 938/938 [02:43<00:00,  5.73batch/s, loss=0.4610, train_acc=80.48%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 1 | Test Acc: 91.95%


Training: 100%|██████████| 938/938 [02:44<00:00,  5.72batch/s, loss=0.4897, train_acc=92.11%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 2 | Test Acc: 92.30%


Training: 100%|██████████| 938/938 [02:44<00:00,  5.71batch/s, loss=0.4677, train_acc=93.29%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 3 | Test Acc: 93.63%


Training: 100%|██████████| 938/938 [02:44<00:00,  5.72batch/s, loss=0.3838, train_acc=93.87%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 4 | Test Acc: 93.82%


Training: 100%|██████████| 938/938 [02:42<00:00,  5.77batch/s, loss=0.3271, train_acc=94.31%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 5 | Test Acc: 94.30%


Training: 100%|██████████| 938/938 [02:41<00:00,  5.81batch/s, loss=0.2616, train_acc=94.66%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 6 | Test Acc: 93.96%


Training: 100%|██████████| 938/938 [02:38<00:00,  5.91batch/s, loss=0.3040, train_acc=94.81%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 7 | Test Acc: 94.43%


Training: 100%|██████████| 938/938 [02:39<00:00,  5.90batch/s, loss=0.3796, train_acc=95.32%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 8 | Test Acc: 95.22%


Training: 100%|██████████| 938/938 [02:39<00:00,  5.90batch/s, loss=0.3186, train_acc=95.49%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 9 | Test Acc: 94.78%


Training: 100%|██████████| 938/938 [02:39<00:00,  5.89batch/s, loss=0.2140, train_acc=95.75%]


enforce sparsity start, nonzeros:  8714304
enforce sparsity end, nonzeros:  63546
submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 90321}
Trial 1 | Epoch 10 | Test Acc: 95.14%

=== Training Unlearnable_RNN_Same_Sparsity Trial 1 ===
Starting Experiment: Unlearnable_RNN_Same_Sparsity Trial 1
Experiment configuration:
  type: basicrnn
  init: permuted_droso
  trainable: False
  pruning: False

Annotation file: Found 29 sensory neuron IDs
Annotation file: Found 400 output neuron IDs
Connectivity matrix contains 2952 neurons
After filtering, found 29 sensory neurons in matrix
After filtering, found 400 output neurons in matrix
Remaining 2523 neurons classified as internal
BasicRNN init: trainable=False, pruning=False, target_nonzeros=63545, lambda_l1=None
LoRA config: use_lora=False, rank=8, alpha=16
W_init.shape: (2952, 2952), sensory_dim: 29, internal_dim: 2523, output_dim: 400
Trial 1 | Epoch 0 | Test Acc: 14.10%


Training: 100%|██████████| 938/938 [00:47<00:00, 19.56batch/s, loss=0.0857, train_acc=84.28%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 1 | Test Acc: 92.29%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.52batch/s, loss=0.2520, train_acc=93.26%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 2 | Test Acc: 93.94%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.53batch/s, loss=0.6014, train_acc=94.46%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 3 | Test Acc: 94.27%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.45batch/s, loss=0.1107, train_acc=95.19%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 4 | Test Acc: 95.37%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.49batch/s, loss=0.0774, train_acc=95.57%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 5 | Test Acc: 95.63%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.41batch/s, loss=0.0864, train_acc=95.94%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 6 | Test Acc: 94.77%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.41batch/s, loss=0.0745, train_acc=96.25%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 7 | Test Acc: 96.26%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.52batch/s, loss=0.1264, train_acc=96.54%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 8 | Test Acc: 95.93%


Training: 100%|██████████| 938/938 [00:47<00:00, 19.58batch/s, loss=0.1956, train_acc=96.69%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 9 | Test Acc: 96.23%


Training: 100%|██████████| 938/938 [00:48<00:00, 19.48batch/s, loss=0.0327, train_acc=96.91%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 10 | Test Acc: 96.26%

=== Training Learnable_RNN_No_Sparsity Trial 1 ===
Starting Experiment: Learnable_RNN_No_Sparsity Trial 1
Experiment configuration:
  type: basicrnn
  trainable: True
  init: droso

Annotation file: Found 29 sensory neuron IDs
Annotation file: Found 400 output neuron IDs
Connectivity matrix contains 2952 neurons
After filtering, found 29 sensory neurons in matrix
After filtering, found 400 output neurons in matrix
Remaining 2523 neurons classified as internal
BasicRNN init: trainable=True, pruning=None, target_nonzeros=63545, lambda_l1=None
LoRA config: use_lora=False, rank=8, alpha=16
W_init.shape: (2952, 2952), sensory_dim: 29, internal_dim: 2523, output_dim: 400
Trial 1 | Epoch 0 | Test Acc: 9.31%


Training: 100%|██████████| 938/938 [02:17<00:00,  6.82batch/s, loss=0.1481, train_acc=63.62%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5600742}
Trial 1 | Epoch 1 | Test Acc: 90.33%


Training: 100%|██████████| 938/938 [02:18<00:00,  6.75batch/s, loss=0.3160, train_acc=92.03%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5613264}
Trial 1 | Epoch 2 | Test Acc: 93.69%


Training: 100%|██████████| 938/938 [02:18<00:00,  6.77batch/s, loss=0.6649, train_acc=94.16%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5615592}
Trial 1 | Epoch 3 | Test Acc: 94.24%


Training: 100%|██████████| 938/938 [02:18<00:00,  6.76batch/s, loss=0.1602, train_acc=94.89%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5619079}
Trial 1 | Epoch 4 | Test Acc: 94.63%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.70batch/s, loss=0.0558, train_acc=95.27%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5623960}
Trial 1 | Epoch 5 | Test Acc: 95.18%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.74batch/s, loss=0.1736, train_acc=95.65%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5624684}
Trial 1 | Epoch 6 | Test Acc: 95.03%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.73batch/s, loss=0.2641, train_acc=95.84%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5626581}
Trial 1 | Epoch 7 | Test Acc: 95.37%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.75batch/s, loss=0.2324, train_acc=95.67%] 


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5627584}
Trial 1 | Epoch 8 | Test Acc: 95.20%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.73batch/s, loss=0.1967, train_acc=95.97%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5628240}
Trial 1 | Epoch 9 | Test Acc: 95.21%


Training: 100%|██████████| 938/938 [02:19<00:00,  6.74batch/s, loss=0.0424, train_acc=96.42%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 5629191}
Trial 1 | Epoch 10 | Test Acc: 95.35%

=== Training Unlearnable_RNN_No_Sparsity Trial 1 ===
Starting Experiment: Unlearnable_RNN_No_Sparsity Trial 1
Experiment configuration:
  type: basicrnn
  trainable: False
  init: random

Annotation file: Found 29 sensory neuron IDs
Annotation file: Found 400 output neuron IDs
Connectivity matrix contains 2952 neurons
After filtering, found 29 sensory neurons in matrix
After filtering, found 400 output neurons in matrix
Remaining 2523 neurons classified as internal
BasicRNN init: trainable=False, pruning=None, target_nonzeros=8714304, lambda_l1=None
LoRA config: use_lora=False, rank=8, alpha=16
W_init.shape: (2952, 2952), sensory_dim: 29, internal_dim: 2523, output_dim: 400
Trial 1 | Epoch 0 | Test Acc: 7.73%


Training: 100%|██████████| 938/938 [00:54<00:00, 17.18batch/s, loss=0.1147, train_acc=84.05%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 1 | Test Acc: 91.69%


Training: 100%|██████████| 938/938 [00:51<00:00, 18.04batch/s, loss=0.3877, train_acc=92.84%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 2 | Test Acc: 93.62%


Training: 100%|██████████| 938/938 [00:52<00:00, 17.90batch/s, loss=0.5409, train_acc=94.12%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 3 | Test Acc: 94.54%


Training: 100%|██████████| 938/938 [00:52<00:00, 18.02batch/s, loss=0.1079, train_acc=94.91%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 4 | Test Acc: 95.26%


Training: 100%|██████████| 938/938 [00:51<00:00, 18.15batch/s, loss=0.0757, train_acc=95.49%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 5 | Test Acc: 95.44%


Training: 100%|██████████| 938/938 [00:49<00:00, 18.87batch/s, loss=0.0756, train_acc=95.91%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 6 | Test Acc: 95.37%


Training: 100%|██████████| 938/938 [00:49<00:00, 18.83batch/s, loss=0.1561, train_acc=96.24%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 7 | Test Acc: 95.81%


Training: 100%|██████████| 938/938 [00:49<00:00, 18.84batch/s, loss=0.1783, train_acc=96.46%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 8 | Test Acc: 96.07%


Training: 100%|██████████| 938/938 [00:50<00:00, 18.69batch/s, loss=0.1880, train_acc=96.73%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 9 | Test Acc: 96.11%


Training: 100%|██████████| 938/938 [00:50<00:00, 18.72batch/s, loss=0.0735, train_acc=96.93%]


submodule nonzero values: {'input_proj': 22765, 'output_layer': 4010, 'activation': 0, 'total': 26775}
Trial 1 | Epoch 10 | Test Acc: 96.19%
