In [None]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# helper functions

In [None]:
def get_model_weights(model):
    weights_dict = {}

    for name, param in model.named_parameters():
        if param.requires_grad:
            weights_dict[name] = param.data.clone()

    return weights_dict

test

In [None]:
# Define a sample model
class SampleModel(torch.nn.Module):
    def __init__(self):
        super(SampleModel, self).__init__()
        self.fc1 = torch.nn.Linear(10, 5)
        self.fc2 = torch.nn.Linear(5, 2)

model = SampleModel()

# Call the function to get the weights
weights = get_model_weights(model)

# Print the weights for each layer
for name, weight in weights.items():
    print(f"Layer: {name}, Shape: {weight.shape}")

Layer: fc1.weight, Shape: torch.Size([5, 10])
Layer: fc1.bias, Shape: torch.Size([5])
Layer: fc2.weight, Shape: torch.Size([2, 5])
Layer: fc2.bias, Shape: torch.Size([2])


In [None]:
def find_dictionary_differences(dict1, dict2):
    differences = {}

    for key in dict1:
        if key in dict2:
            value1 = dict1[key]
            value2 = dict2[key]
            if value1 != value2:
                differences[key] = (value1, value2)

    return differences

test

In [None]:
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = {'a': 1, 'b': 4, 'c': 5}

result = find_dictionary_differences(dict1, dict2)

print(result)


{'b': (2, 4), 'c': (3, 5)}


# Inception like model creation example

In [None]:
class InceptionLike3Stack(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionLike3Stack, self).__init__()

        # Stack 1
        self.stack1_branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.stack1_branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.stack1_branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.Conv2d(32, 32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True)
        )
        self.stack1_branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        # Stack 2
        self.stack2_branch1x1 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.stack2_branch3x3 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=1),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.stack2_branch5x5 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=1),
            nn.Conv2d(64, 64, kernel_size=5, padding=2),
            nn.ReLU(inplace=True)
        )
        self.stack2_branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(128, 64, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        # Stack 3
        self.stack3_branch1x1 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.stack3_branch3x3 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=1),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.stack3_branch5x5 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=1),
            nn.Conv2d(128, 128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True)
        )
        self.stack3_branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(256, 128, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        # Global Average Pooling
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)

        # Fully connected layer for classification
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        # Stack 1
        stack1_branch1x1 = self.stack1_branch1x1(x)
        stack1_branch3x3 = self.stack1_branch3x3(x)
        stack1_branch5x5 = self.stack1_branch5x5(x)
        stack1_branch_pool = self.stack1_branch_pool(x)
        stack1_concatenated = torch.cat([stack1_branch1x1, stack1_branch3x3, stack1_branch5x5, stack1_branch_pool], dim=1)

        # Stack 2
        stack2_branch1x1 = self.stack2_branch1x1(stack1_concatenated)
        stack2_branch3x3 = self.stack2_branch3x3(stack1_concatenated)
        stack2_branch5x5 = self.stack2_branch5x5(stack1_concatenated)
        stack2_branch_pool = self.stack2_branch_pool(stack1_concatenated)
        stack2_concatenated = torch.cat([stack2_branch1x1, stack2_branch3x3, stack2_branch5x5, stack2_branch_pool], dim=1)

        # Stack 3
        stack3_branch1x1 = self.stack3_branch1x1(stack2_concatenated)
        stack3_branch3x3 = self.stack3_branch3x3(stack2_concatenated)
        stack3_branch5x5 = self.stack3_branch5x5(stack2_concatenated)
        stack3_branch_pool = self.stack3_branch_pool(stack2_concatenated)
        stack3_concatenated = torch.cat([stack3_branch1x1, stack3_branch3x3, stack3_branch5x5, stack3_branch_pool], dim=1)

        # Global Average Pooling
        pooled = self.global_avg_pool(stack3_concatenated)

        # Flatten the output for the fully connected layer
        flattened = pooled.view(pooled.size(0), -1)

        # Fully connected layer for classification
        output = self.fc(flattened)

        return output

# Create an instance of the Inception-like 3-stack model
in_channels = 1  # Input channels (e.g., for RGB images)
num_classes = 10  # Number of classes in your classification task
model = InceptionLike3Stack(in_channels, num_classes)

# Print the model architecture
print(model)


InceptionLike3Stack(
  (stack1_branch1x1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace=True)
  )
  (stack1_branch3x3): Sequential(
    (0): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): ReLU(inplace=True)
  )
  (stack1_branch5x5): Sequential(
    (0): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (2): ReLU(inplace=True)
  )
  (stack1_branch_pool): Sequential(
    (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
    (1): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
    (2): ReLU(inplace=True)
  )
  (stack2_branch1x1): Sequential(
    (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace=True)
  )
  (stack2_branch3x3): Sequential(
    (0): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(64, 64, ker

# Dataset loading example

In [None]:
import torch
from torchvision import datasets, transforms
import random

# Define a random seed for reproducibility
random.seed(42)

# Define a data transformation (convert images to PyTorch tensors)
transform = transforms.Compose([transforms.ToTensor()])

# Load the full MNIST training dataset
mnist_train_full = datasets.MNIST(root='./data', train=True, transform=transform, download=True)

# Load the full MNIST test dataset
mnist_test_full = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Create a random subset of 1000 training samples
train_indices = random.sample(range(len(mnist_train_full)), 1000)
mnist_train_subset = torch.utils.data.Subset(mnist_train_full, train_indices)

# Create a random subset of 100 test samples
test_indices = random.sample(range(len(mnist_test_full)), 100)
mnist_test_subset = torch.utils.data.Subset(mnist_test_full, test_indices)

# Create data loaders for batching
batch_size = 64
train_loader = torch.utils.data.DataLoader(mnist_train_subset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test_subset, batch_size=batch_size, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 88968351.23it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 123860627.63it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25431213.65it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 19262415.34it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






# trainer without recording weights change

In [None]:
def train_model(model, train_loader, valid_loader, num_epochs, learning_rate):
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode

        # Initialize variables to keep track of loss and accuracy
        train_loss = 0.0
        correct_train = 0
        total_train = 0

        # Training step
        for inputs, labels in train_loader:
            optimizer.zero_grad()  # Zero the parameter gradients

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Compute training accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            train_loss += loss.item()

        # Compute training accuracy and loss
        train_accuracy = correct_train / total_train
        avg_train_loss = train_loss / len(train_loader)

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        valid_loss = 0.0
        correct_valid = 0
        total_valid = 0

        with torch.no_grad():
            for inputs, labels in valid_loader:

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Compute validation accuracy
                _, predicted = torch.max(outputs.data, 1)
                total_valid += labels.size(0)
                correct_valid += (predicted == labels).sum().item()

                valid_loss += loss.item()

        # Compute validation accuracy and loss
        valid_accuracy = correct_valid / total_valid
        avg_valid_loss = valid_loss / len(valid_loader)

        # Print training and validation metrics for this epoch
        print(f"Epoch [{epoch + 1}/{num_epochs}]")
        print(f"Training Loss: {avg_train_loss:.4f} | Training Accuracy: {train_accuracy * 100:.2f}%")
        print(f"Validation Loss: {avg_valid_loss:.4f} | Validation Accuracy: {valid_accuracy * 100:.2f}%")
        print('-' * 40)

    print("Training completed.")

# Example usage:
# train_model(model, train_loader, valid_loader, num_epochs=10, learning_rate=0.001)


# sample run

In [None]:
#train_model(model, train_loader, test_loader, num_epochs=10, learning_rate=0.001)

# Trainer with recording weights change

In [None]:
# Check if a GPU is available, and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define training and evaluation functions
def train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    weight_changes = []
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # Calculate weight changes
        weight_changes_per_layer = []
        for param in model.parameters():
            if param.grad is not None:
                weight_changes_per_layer.append(torch.norm(param.grad).item())
        weight_changes.append(weight_changes_per_layer)

    return (
        total_loss / len(train_loader),
        correct / total,
        torch.mean(torch.tensor(weight_changes, dtype=torch.float32), dim=0)
    )

def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # Move data to GPU
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return total_loss / len(test_loader), correct / total

# Set up data loaders and model
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 5
avg_weight_changes_list = {}
for epoch in range(num_epochs):
    train_loss, train_acc, avg_weight_changes = train(model, train_loader, optimizer, criterion)
    test_loss, test_acc = test(model, test_loader, criterion)
    avg_weight_changes_list["epoch"+str(epoch+1)] = avg_weight_changes.tolist()  # Convert to list for serialization
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
    #print("Average Weight Changes in Each Layer:")
    #for i, change in enumerate(avg_weight_changes):
        #print(f"Layer {i+1}: {change:.4f}")


Epoch [1/5]
Train Loss: 2.2945, Train Acc: 0.1121
Test Loss: 2.2807, Test Acc: 0.1151
Average Weight Changes in Each Layer:
Layer 1: 0.0024
Layer 2: 0.0027
Layer 3: 0.0010
Layer 4: 0.0011
Layer 5: 0.0249
Layer 6: 0.0020
Layer 7: 0.0014
Layer 8: 0.0017
Layer 9: 0.0554
Layer 10: 0.0027
Layer 11: 0.0027
Layer 12: 0.0032
Layer 13: 0.0392
Layer 14: 0.0073
Layer 15: 0.0240
Layer 16: 0.0045
Layer 17: 0.0529
Layer 18: 0.0079
Layer 19: 0.0213
Layer 20: 0.0040
Layer 21: 0.0717
Layer 22: 0.0069
Layer 23: 0.0466
Layer 24: 0.0076
Layer 25: 0.0559
Layer 26: 0.0215
Layer 27: 0.0317
Layer 28: 0.0122
Layer 29: 0.0773
Layer 30: 0.0220
Layer 31: 0.0315
Layer 32: 0.0121
Layer 33: 0.1157
Layer 34: 0.0214
Layer 35: 0.0671
Layer 36: 0.0213
Layer 37: 0.1719
Layer 38: 0.1167
Epoch [2/5]
Train Loss: 2.1434, Train Acc: 0.2090
Test Loss: 1.9497, Test Acc: 0.2923
Average Weight Changes in Each Layer:
Layer 1: 0.0230
Layer 2: 0.0304
Layer 3: 0.0089
Layer 4: 0.0133
Layer 5: 0.2133
Layer 6: 0.0181
Layer 7: 0.0467
Lay

In [None]:
l=[0]*38
for i in avg_weight_changes_list:
  for j,k in enumerate(avg_weight_changes_list[i]):
    l[j]=l[j]+avg_weight_changes_list[i][j]

In [None]:
x=0
for name, param in model.named_parameters():
    print("Layer"+str(x))
    print(f"Parameter Name: {name}")
    print(f"Parameter Shape: {param.shape}")
    print(l[x])
    print()
    x=x+1

Layer0
Parameter Name: stack1_branch1x1.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.213584826560691

Layer1
Parameter Name: stack1_branch1x1.0.bias
Parameter Shape: torch.Size([32])
0.3241511220112443

Layer2
Parameter Name: stack1_branch3x3.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.07263509999029338

Layer3
Parameter Name: stack1_branch3x3.0.bias
Parameter Shape: torch.Size([32])
0.10977848491165787

Layer4
Parameter Name: stack1_branch3x3.1.weight
Parameter Shape: torch.Size([32, 32, 3, 3])
1.9069428537040949

Layer5
Parameter Name: stack1_branch3x3.1.bias
Parameter Shape: torch.Size([32])
0.173592024249956

Layer6
Parameter Name: stack1_branch5x5.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.3716834143269807

Layer7
Parameter Name: stack1_branch5x5.0.bias
Parameter Shape: torch.Size([32])
0.4333468029508367

Layer8
Parameter Name: stack1_branch5x5.1.weight
Parameter Shape: torch.Size([32, 32, 5, 5])
5.338232807815075

Layer9
Parameter Name: stack1_branc

# Record grad before update

In [None]:
# Check if a GPU is available, and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Your model definition (replace 'model' with your actual model)
# For example, if you have a model named 'Net', you can instantiate it like this:
# model = Net()
# Then, move the model to the GPU if available:
# model.to(device)

# Define training and evaluation functions
def train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    weight_changes = []
    prev_params = [param.clone().detach() for param in model.parameters()]  # Create a copy of the initial parameters
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # Calculate weight changes
        current_params = [param.clone().detach() for param in model.parameters()]
        weight_changes_per_layer = [
            torch.norm(current_params[i] - prev_params[i]).item()
            for i in range(len(current_params))
        ]
        prev_params = current_params  # Update previous parameters

        weight_changes.append(weight_changes_per_layer)

    return (
        total_loss / len(train_loader),
        correct / total,
        torch.mean(torch.tensor(weight_changes, dtype=torch.float32), dim=0)
    )

def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # Move data to GPU
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return total_loss / len(test_loader), correct / total

# Set up data loaders and model
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Instantiate your model and move it to the GPU
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 5
avg_weight_changes_list = {}
for epoch in range(num_epochs):
    train_loss, train_acc, avg_weight_changes = train(model, train_loader, optimizer, criterion)
    test_loss, test_acc = test(model, test_loader, criterion)
    avg_weight_changes_list["epoch"+str(epoch+1)] = avg_weight_changes.tolist()  # Convert to list for serialization
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
    #print("Average Weight Changes in Each Layer:")
    #for i, change in enumerate(avg_weight_changes):
    #    print(f"Layer {i+1}: {change:.4f}")


Epoch [1/5]
Train Loss: 0.8032, Train Acc: 0.7444
Test Loss: 0.5363, Test Acc: 0.8309
Epoch [2/5]
Train Loss: 0.4491, Train Acc: 0.8639
Test Loss: 0.4054, Test Acc: 0.8516
Epoch [3/5]
Train Loss: 0.3217, Train Acc: 0.9036
Test Loss: 0.2631, Test Acc: 0.9152
Epoch [4/5]
Train Loss: 0.2599, Train Acc: 0.9224
Test Loss: 0.1929, Test Acc: 0.9415
Epoch [5/5]
Train Loss: 0.2219, Train Acc: 0.9341
Test Loss: 0.2285, Test Acc: 0.9273


In [None]:
def get_kernel_ave_change(avg_weight_changes_list, starting_epoch, stack_range):
  l=[]
  for name, param in model.named_parameters():
    l.append(0)
  for epoch, i in enumerate(avg_weight_changes_list):
    if epoch<starting_epoch:
      continue
    for j,k in enumerate(avg_weight_changes_list[i]):
      l[j]=l[j]+avg_weight_changes_list[i][j]
  x=0
  kernel_average_weight_changes={}

  kernel_1_average_weight_changes=[]
  kernel_3_average_weight_changes=[]
  kernel_5_average_weight_changes=[]

  for name, param in model.named_parameters():
      if not name.startswith("stack"):
        continue
      if int(name[5])>stack_range[1] or int(name[5])<stack_range[0]:
        continue
      print("Layer"+str(x))
      print(f"Parameter Name: {name}")
      print(f"Parameter Shape: {param.shape}")
      print(l[x])
      print()
      if name[13:].startswith("1x1"):
          kernel_1_average_weight_changes.append(l[x])
      elif name[13:].startswith("3x3"):
          kernel_3_average_weight_changes.append(l[x])
      elif name[13:].startswith("5x5"):
          kernel_5_average_weight_changes.append(l[x])
      x=x+1
  kernel_average_weight_changes["kernal_1x1"]=kernel_1_average_weight_changes
  kernel_average_weight_changes["kernal_3x3"]=kernel_3_average_weight_changes
  kernel_average_weight_changes["kernal_5x5"]=kernel_5_average_weight_changes

  for i in kernel_average_weight_changes:
    print(i)
    print(len(kernel_average_weight_changes[i]))
    print(kernel_average_weight_changes[i])
    print(sum(kernel_average_weight_changes[i])/len(kernel_average_weight_changes[i]))
    print()



In [None]:
get_kernel_ave_change(avg_weight_changes_list, 0, (0,3))

Layer0
Parameter Name: stack1_branch1x1.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.0012695097539108247

Layer1
Parameter Name: stack1_branch1x1.0.bias
Parameter Shape: torch.Size([32])
0.0027390223112888634

Layer2
Parameter Name: stack1_branch3x3.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.0010028609831351787

Layer3
Parameter Name: stack1_branch3x3.0.bias
Parameter Shape: torch.Size([32])
0.0013860615581506863

Layer4
Parameter Name: stack1_branch3x3.1.weight
Parameter Shape: torch.Size([32, 32, 3, 3])
0.02061924058943987

Layer5
Parameter Name: stack1_branch3x3.1.bias
Parameter Shape: torch.Size([32])
0.001982829999178648

Layer6
Parameter Name: stack1_branch5x5.0.weight
Parameter Shape: torch.Size([32, 1, 1, 1])
0.008709743968211114

Layer7
Parameter Name: stack1_branch5x5.0.bias
Parameter Shape: torch.Size([32])
0.00668087974190712

Layer8
Parameter Name: stack1_branch5x5.1.weight
Parameter Shape: torch.Size([32, 32, 5, 5])
0.07428365014493465

Layer9
Paramete