In [None]:
# Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

cuda
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 78499836.20it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 72104579.66it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 71913936.89it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 19823651.16it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



# Custom MLP - without dropout

In [None]:
class Perceptron(nn.Module):
  def __init__(self, num_inputs):
    super(Perceptron, self).__init__()
    self.weights = nn.Parameter(nn.init.xavier_uniform_(torch.Tensor(1, num_inputs)))
    self.bias = nn.Parameter((torch.randn(1)))

  def propagate(self, inputs):
    output = torch.dot(inputs, self.weights.view(-1)) + self.bias
    return output

class MultiLayerPerceptron(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron, self).__init__()
    self.hidden_layer1 = nn.ModuleList([Perceptron(28*28) for i in range(128)])
    self.hidden_layer2 = nn.ModuleList([Perceptron(128) for i in range(64)])
    self.output_layer = nn.ModuleList([Perceptron(64) for i in range(10)])

  def propagate(self, inputs):
        # Flatten the input data
        inputs = inputs.view(-1, 28*28)

        # Store the matrices of weights
        hidden1_weights = torch.stack([perceptron.weights for perceptron in self.hidden_layer1])
        hidden1_weights = hidden1_weights.squeeze(dim=1)
        hidden2_weights = torch.stack([perceptron.weights for perceptron in self.hidden_layer2])
        hidden2_weights = hidden2_weights.squeeze(dim=1)
        output_weights = torch.stack([perceptron.weights for perceptron in self.output_layer])
        output_weights = output_weights.squeeze(dim=1)

        # Store the vectors of biases
        hidden1_bias = torch.stack([perceptron.bias for perceptron in self.hidden_layer1])
        hidden1_bias = hidden1_bias.view(-1)
        hidden2_bias = torch.stack([perceptron.bias for perceptron in self.hidden_layer2])
        hidden2_bias = hidden2_bias.view(-1)
        output_bias = torch.stack([perceptron.bias for perceptron in self.output_layer])
        output_bias = output_bias.view(-1)

        # Creating output list
        outputs_list = []

        for picture, pixels in enumerate(inputs):
          pixels = torch.relu(F.linear(pixels, hidden1_weights, hidden1_bias))
          pixels = torch.relu(F.linear(pixels, hidden2_weights, hidden2_bias))
          outputs_list.append(F.linear(pixels, output_weights, output_bias))

        # Concatenate the output tensors along the 0th dimension
        output = torch.stack(outputs_list)
        return output.squeeze()

In [None]:
weight_distribution = []
bias_distribution = []
number_experiments = 10

for experiment in range(number_experiments):
  # Model Training
  model = MultiLayerPerceptron()
  loss_function = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr = 0.01)
  num_epochs = 10
  num_neurons = 202
  num_parameters = num_neurons*2 # For weights and biases

  weight_history = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]
  bias_history = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]

  for epoch in range(num_epochs):
      epoch_loss = 0
      for batch in trainloader:
          images, labels = batch
          optimizer.zero_grad()
          outputs = model.propagate(images)
          loss = loss_function(outputs, labels)
          loss.backward()
          optimizer.step()
          epoch_loss += loss.item()
      for idx, param in enumerate(model.named_parameters()):
          if idx%2!=1:
              weight_history[epoch][idx].append(param[1].data.clone())
          else:
              bias_history[epoch][idx].append(param[1].data.clone())

      print("Epoch:", epoch + 1, " Loss:", epoch_loss / len(trainloader))

  weight_distribution.append(weight_history)
  bias_distribution.append(bias_history)

Epoch: 1  Loss: 0.5601648842332079
Epoch: 2  Loss: 0.3081548534619656
Epoch: 3  Loss: 0.26011377833545335
Epoch: 4  Loss: 0.22806015506243782
Epoch: 5  Loss: 0.2031794820330354
Epoch: 6  Loss: 0.1830284744659975
Epoch: 7  Loss: 0.1672863937568849
Epoch: 8  Loss: 0.1538584763482054
Epoch: 9  Loss: 0.14243584793053074
Epoch: 10  Loss: 0.13199058028339133
Epoch: 1  Loss: 0.5649746936092626
Epoch: 2  Loss: 0.30023697545247546
Epoch: 3  Loss: 0.25186321703689313
Epoch: 4  Loss: 0.2187752928068516
Epoch: 5  Loss: 0.1955351133598511
Epoch: 6  Loss: 0.17745279865081248
Epoch: 7  Loss: 0.16270116122880343
Epoch: 8  Loss: 0.14964200557866839
Epoch: 9  Loss: 0.13955957656388662
Epoch: 10  Loss: 0.12994932497480213
Epoch: 1  Loss: 0.5845752139208413
Epoch: 2  Loss: 0.31106119013544337
Epoch: 3  Loss: 0.2629405044193969
Epoch: 4  Loss: 0.2311842833151187
Epoch: 5  Loss: 0.2077137244734238
Epoch: 6  Loss: 0.1882971264719804
Epoch: 7  Loss: 0.17245602893279688
Epoch: 8  Loss: 0.15918436561868007
Epoc

In [None]:
# Model Testing
correct = 0
total = 0
with torch.no_grad():
  for batch in testloader:
    images, labels = batch
    outputs = model.propagate(images)
    _,predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
accuracy1 = (correct / total)*100
print(accuracy1, "%")

95.61 %


#Kernel Distribution Estimation

## Gaussian Kernel

In [None]:
# # Importing the necessary libraries
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.stats import gaussian_kde

# # Bandwidth parameter (adjust this)
# bandwidth = 0.1

# def plot_kde(weights, name):
#     plt.figure(figsize=(8, 4))
#     plt.title(f'{name} - Epoch 1 - 10')

#     for epoch in range(num_epochs):
#         # Create a KDE object with Gaussian kernel
#         kde = gaussian_kde(weights[epoch][0][0], bw_method=bandwidth)

#         # Define a range of x values for the PDF
#         x_values = np.linspace(min(weights[epoch][0][0]), max(weights[epoch][0][0]), 1000)

#         # Calculate the estimated PDF at each x value
#         pdf_values = kde(x_values)

#         # Plot the KDE estimate
#         plt.plot(x_values, pdf_values, label=f'Epoch {epoch + 1}')
#     # plt.title(f'KDE with Gaussian Kernel - Epoch {epoch + 1}')
#     plt.xlabel('Weight')
#     plt.ylabel('Estimated Density')
#     plt.legend()
#     plt.show()

# # Loop through weight distributions for each epoch
# for idx, (name, param) in enumerate(model.named_parameters()):
#   if 'weight' in name:
#       plot_kde([weight_history[i][idx] for i in range(num_epochs)], name)

# KL Divergence of Weight Distributions among Neurons per Layer per Epoch

## Standard Deviation

### Top Down

In [None]:
# import matplotlib.pyplot as plt
# import torch
# from scipy.special import kl_div

# # Initialize lists to store standard deviations for each layer across epochs
# std_deviations_hidden1 = []
# std_deviations_hidden2 = []
# std_deviations_output = []

# # Loop over each epoch - 10 times
# for epoch in range(num_epochs):
#     kl_divergences = [[0.0] * 127, [0.0] * 63, [0.0] * 9]  # Store KL divergences for each layer
#     for experiment in range(number_experiments):
#       hidden1_idx = 0
#       hidden2_idx = 0
#       output_idx = 0
#       for idx, (name, param) in enumerate(model.named_parameters()):
#           if 'weight' in name:
#               if 'hidden_layer1' in name:
#                   if idx < 254:
#                       # Calculate the KL divergence
#                       kl_divergence = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx + 2][0][0])
#                       kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#                       kl_divergence = sum(kl_divergence)
#                       kl_divergences[0][hidden1_idx] += kl_divergence
#                       hidden1_idx += 1
#               elif 'hidden_layer2' in name:
#                   if idx < 382:
#                       # Calculate the KL divergence
#                       kl_divergence = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx + 2][0][0])
#                       kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#                       kl_divergence = sum(kl_divergence)
#                       kl_divergences[1][hidden2_idx] += kl_divergence
#                       hidden2_idx += 1
#               elif 'output_layer' in name:
#                   if idx < 402:
#                       # Calculate the KL divergence
#                       kl_divergence = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx + 2][0][0])
#                       kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#                       kl_divergence = sum(kl_divergence)
#                       kl_divergences[2][output_idx] += kl_divergence
#                       output_idx += 1

#     # Divide each KL divergence by number_experiments (number of experiments)
#     for i in range(3):
#         kl_divergences[i] = [kl / number_experiments for kl in kl_divergences[i]]

#     # Calculate the standard deviation for each layer and store it for this epoch
#     std_hidden1 = torch.std(torch.tensor(kl_divergences[0]))
#     std_hidden2 = torch.std(torch.tensor(kl_divergences[1]))
#     std_output = torch.std(torch.tensor(kl_divergences[2]))

#     std_deviations_hidden1.append(std_hidden1.item())
#     std_deviations_hidden2.append(std_hidden2.item())
#     std_deviations_output.append(std_output.item())

# # Create a single subplot for all three layers
# fig, ax = plt.subplots(figsize=(8, 6))

# # Plot standard deviation of KL divergence for Hidden Layer 1
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1, marker='o', linestyle='-', color='blue', label='Hidden Layer 1')

# # Plot standard deviation of KL divergence for Hidden Layer 2
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2, marker='o', linestyle='-', color='green', label='Hidden Layer 2')

# # Plot standard deviation of KL divergence for Output Layer
# # ax.plot(range(1, num_epochs + 1), std_deviations_output, marker='o', linestyle='-', color='red', label='Output Layer')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence - Top Down')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

### Bottom Up

In [None]:
# # Initialize lists to store standard deviations for each layer across epochs
# std_deviations_hidden1_reverse = []
# std_deviations_hidden2_reverse = []
# std_deviations_output_reverse = []

# # Loop over each epoch - 10 times
# for epoch in range(num_epochs):
#     kl_divergences_reverse = [[0.0] * 127, [0.0] * 63, [0.0] * 9]  # Store KL divergences for each layer
#     for experiment in range(number_experiments):
#       hidden1_idx = 0
#       hidden2_idx = 0
#       output_idx = 0
#       for idx, (name, param) in reversed(list(enumerate(model.named_parameters()))):
#           if 'weight' in name:
#               if 'hidden_layer1' in name:
#                   if idx > 3:
#                       # Calculate the KL divergence
#                       kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx - 2][0][0])
#                       kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#                       kl_divergence_reverse = sum(kl_divergence_reverse)
#                       kl_divergences_reverse[0][hidden1_idx] += kl_divergence_reverse
#                       hidden1_idx += 1
#               elif 'hidden_layer2' in name:
#                   if idx > 256:
#                       # Calculate the KL divergence
#                       kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx - 2][0][0])
#                       kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#                       kl_divergence_reverse = sum(kl_divergence_reverse)
#                       kl_divergences_reverse[1][hidden2_idx] += kl_divergence_reverse
#                       hidden2_idx += 1
#               elif 'output_layer' in name:
#                   if idx > 384:
#                       # Calculate the KL divergence
#                       kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][idx][0][0],
#                                             weight_distribution[experiment][epoch][idx - 2][0][0])
#                       kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#                       kl_divergence_reverse = sum(kl_divergence_reverse)
#                       kl_divergences_reverse[2][output_idx] += kl_divergence_reverse
#                       output_idx += 1

#     # Divide each KL divergence by number_experiments (number of experiments)
#     for i in range(3):
#         kl_divergences_reverse[i] = [kl / number_experiments for kl in kl_divergences_reverse[i]]

#     # Calculate the standard deviation for each layer and store it for this epoch
#     std_hidden1_reverse = torch.std(torch.tensor(kl_divergences_reverse[0]))
#     std_hidden2_reverse = torch.std(torch.tensor(kl_divergences_reverse[1]))
#     std_output_reverse = torch.std(torch.tensor(kl_divergences_reverse[2]))

#     std_deviations_hidden1_reverse.append(std_hidden1_reverse.item())
#     std_deviations_hidden2_reverse.append(std_hidden2_reverse.item())
#     std_deviations_output_reverse.append(std_output_reverse.item())

# # Create a single subplot for all three layers
# fig, ax = plt.subplots(figsize=(8, 6))

# # Plot standard deviation of KL divergence for Hidden Layer 1
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1_reverse, marker='o', linestyle='-', color='blue', label='Hidden Layer 1')

# # Plot standard deviation of KL divergence for Hidden Layer 2
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2_reverse, marker='o', linestyle='-', color='green', label='Hidden Layer 2')

# # Plot standard deviation of KL divergence for Output Layer
# # ax.plot(range(1, num_epochs + 1), std_deviations_output_reverse, marker='o', linestyle='-', color='red', label='Output Layer')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence for All Layers')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# # Hidden Layer 1
# fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1, marker='o', linestyle='-', color='blue', label='Top Down')
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1_reverse, marker='o', linestyle='-', color='green', label='Bottom Up')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence for Hidden Layer 1')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# # Hidden Layer 2
# fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2, marker='o', linestyle='-', color='blue', label='Top Down')
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2_reverse, marker='o', linestyle='-', color='green', label='Bottom Up')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence for Hidden Layer 2')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# from scipy.special import kl_div

# # Loop over each epoch - 10 times
# for epoch in range(num_epochs):
#   kl_divergences = [[0.0]*127, [0.0]*63, [0.0]*9]
#   hidden1_idx = 0
#   hidden2_idx = 0
#   output_idx = 0
#   for idx, (name, param) in enumerate(model.named_parameters()):
#     if 'weight' in name:
#       if 'hidden_layer1' in name:
#         if idx < 254:
#           # Calculate the KL divergence
#           kl_divergence = kl_div(weight_history[epoch][idx][0][0], weight_history[epoch][idx+2][0][0])
#           kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#           kl_divergence = sum(kl_divergence)
#           kl_divergences[0][hidden1_idx] = kl_divergence
#           hidden1_idx+=1
#       elif 'hidden_layer2' in name:
#         if idx < 382:
#           # Calculate the KL divergence
#           kl_divergence = kl_div(weight_history[epoch][idx][0][0], weight_history[epoch][idx+2][0][0])
#           kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#           kl_divergence = sum(kl_divergence)
#           kl_divergences[1][hidden2_idx] = kl_divergence
#           hidden2_idx+=1
#       elif 'output_layer' in name:
#         if idx < 402:
#           # Calculate the KL divergence
#           kl_divergence = kl_div(weight_history[epoch][idx][0][0], weight_history[epoch][idx+2][0][0])
#           kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#           kl_divergence = sum(kl_divergence)
#           kl_divergences[2][output_idx] = kl_divergence
#           output_idx+=1

#       # Define the neuron numbers
#   neuron_numbers_hidden1 = range(1, 128)
#   neuron_numbers_hidden2 = range(1, 64)
#   neuron_numbers_output = range(1, 10)

#   # Create subplots for each layer
#   fig, axs = plt.subplots(3, figsize=(8, 12))

#   # Plot KL divergences for hidden1 layer
#   axs[0].plot(neuron_numbers_hidden1, kl_divergences[0], marker='o', linestyle='-', color='blue')
#   axs[0].set_title('KL Divergence for Hidden Layer 1')
#   axs[0].set_xlabel('Neuron Number')
#   axs[0].set_ylabel('KL Divergence')

#   # Plot KL divergences for hidden2 layer
#   axs[1].plot(neuron_numbers_hidden2, kl_divergences[1], marker='o', linestyle='-', color='green')
#   axs[1].set_title('KL Divergence for Hidden Layer 2')
#   axs[1].set_xlabel('Neuron Number')
#   axs[1].set_ylabel('KL Divergence')

#   # Plot KL divergences for output layer
#   axs[2].plot(neuron_numbers_output, kl_divergences[2], marker='o', linestyle='-', color='red')
#   axs[2].set_title('KL Divergence for Output Layer')
#   axs[2].set_xlabel('Neuron Number')
#   axs[2].set_ylabel('KL Divergence')

#   # Add a title above all subplots with the epoch number
#   plt.suptitle(f'Epoch {epoch + 1}')

#   # Adjust layout
#   plt.tight_layout()

#   # Show the plot
#   plt.show()

# Prebuilt MLP

In [None]:
# # Define a simple feedforward neural network
# class NeuralNetwork(nn.Module):
#     def __init__(self):
#         super(NeuralNetwork, self).__init__()
#         self.fc1 = nn.Linear(28 * 28, 128)  # Input: 28x28 image, Output: 128
#         self.fc2 = nn.Linear(128, 64)      # Hidden layer: 128 -> 64
#         self.fc3 = nn.Linear(64, 10)      # Output: 32 -> 10 (10 classes for MNIST)

#     def forward(self, x):
#         x = x.view(-1, 28 * 28)  # Flatten the input
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [None]:
# weight_distribution2 = []
# bias_distribution2 = []
# number_experiments = 1

# for experiment in range(number_experiments):
#   # Model Training
#   model2 = NeuralNetwork()
#   loss_function = nn.CrossEntropyLoss()
#   optimizer = optim.SGD(model2.parameters(), lr = 0.01)
#   num_epochs = 10
#   num_neurons = 202
#   num_parameters = num_neurons*2 # For weights and biases

#   weight_history2 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]
#   bias_history2 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]

#   for epoch in range(num_epochs):
#       epoch_loss = 0
#       for batch in trainloader:
#           images, labels = batch
#           optimizer.zero_grad()
#           outputs = model2(images)
#           loss = loss_function(outputs, labels)
#           loss.backward()
#           optimizer.step()
#           epoch_loss += loss.item()
#       for idx, param in enumerate(model2.named_parameters()):
#           if idx%2!=1:
#               weight_history2[epoch][idx].append(param[1].data.clone())
#           else:
#               bias_history2[epoch][idx].append(param[1].data.clone())

#       print("Epoch:", epoch + 1, " Loss:", epoch_loss / len(trainloader))

#   weight_distribution2.append(weight_history2)
#   bias_distribution2.append(bias_history2)

In [None]:
# # Model Testing
# correct = 0
# total = 0
# with torch.no_grad():
#   for batch in testloader:
#     images, labels = batch
#     outputs = model2(images)
#     _,predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted == labels).sum().item()
# accuracy2 = (correct / total)*100
# print(accuracy2, "%")

In [None]:
# import matplotlib.pyplot as plt
# import torch
# from scipy.special import kl_div

# # Initialize lists to store standard deviations for each layer across epochs
# std_deviations_hidden1 = []
# std_deviations_hidden2 = []
# std_deviations_output = []

# # Loop over each epoch - 10 times
# for epoch in range(num_epochs):
#     kl_divergences = [[0.0] * 127, [0.0] * 63, [0.0] * 9]  # Store KL divergences for each layer
#     for experiment in range(number_experiments):
#       hidden1_idx = 0
#       hidden2_idx = 0
#       output_idx = 0
#       for layer in range(0,5,2):
#         if layer == 0:
#           for neuron in range(127):
#             # Calculate the KL divergence
#             kl_divergence = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron+1])
#             kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#             kl_divergence = sum(kl_divergence)
#             kl_divergences[0][hidden1_idx] += kl_divergence
#             hidden1_idx += 1
#         if layer == 2:
#           for neuron in range(63):
#             # Calculate the KL divergence
#             kl_divergence = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron+1])
#             kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#             kl_divergence = sum(kl_divergence)
#             kl_divergences[1][hidden2_idx] += kl_divergence
#             hidden2_idx += 1
#         if layer == 4:
#           for neuron in range(9):
#             # Calculate the KL divergence
#             kl_divergence = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron+1])
#             kl_divergence = [kl.item() for kl in kl_divergence if kl != float('inf')]
#             kl_divergence = sum(kl_divergence)
#             kl_divergences[2][output_idx] += kl_divergence
#             output_idx += 1

#     # Divide each KL divergence by number_experiments (number of experiments)
#     for i in range(3):
#         kl_divergences[i] = [kl / number_experiments for kl in kl_divergences[i]]

#     # Calculate the standard deviation for each layer and store it for this epoch
#     std_hidden1 = torch.std(torch.tensor(kl_divergences[0]))
#     std_hidden2 = torch.std(torch.tensor(kl_divergences[1]))
#     std_output = torch.std(torch.tensor(kl_divergences[2]))

#     std_deviations_hidden1.append(std_hidden1.item())
#     std_deviations_hidden2.append(std_hidden2.item())
#     std_deviations_output.append(std_output.item())

# # Create a single subplot for all three layers
# fig, ax = plt.subplots(figsize=(8, 6))

# # Plot standard deviation of KL divergence for Hidden Layer 1
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1, marker='o', linestyle='-', color='blue', label='Hidden Layer 1')

# # Plot standard deviation of KL divergence for Hidden Layer 2
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2, marker='o', linestyle='-', color='green', label='Hidden Layer 2')

# # Plot standard deviation of KL divergence for Output Layer
# # ax.plot(range(1, num_epochs + 1), std_deviations_output, marker='o', linestyle='-', color='red', label='Output Layer')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence - Top Down')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# # Initialize lists to store standard deviations for each layer across epochs
# std_deviations_hidden1_reverse = []
# std_deviations_hidden2_reverse = []
# std_deviations_output_reverse = []

# # Loop over each epoch - 10 times
# for epoch in range(num_epochs):
#     kl_divergences_reverse = [[0.0] * 127, [0.0] * 63, [0.0] * 9]  # Store KL divergences for each layer
#     for experiment in range(number_experiments):
#       hidden1_idx = 0
#       hidden2_idx = 0
#       output_idx = 0
#       for layer in range(0,5,2):
#         if layer == 0:
#           for neuron in range(127, 1, -1):
#             # Calculate the KL divergence
#             kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron-1])
#             kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#             kl_divergence_reverse = sum(kl_divergence_reverse)
#             kl_divergences_reverse[0][hidden1_idx] += kl_divergence_reverse
#             hidden1_idx += 1
#         if layer == 2:
#           for neuron in range(63, 1, -1):
#             # Calculate the KL divergence
#             kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron-1])
#             kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#             kl_divergence_reverse = sum(kl_divergence_reverse)
#             kl_divergences_reverse[1][hidden2_idx] += kl_divergence_reverse
#             hidden2_idx += 1
#         if layer == 4:
#           for neuron in range(9, 1, -1):
#             # Calculate the KL divergence
#             kl_divergence_reverse = kl_div(weight_distribution[experiment][epoch][layer][0][neuron],
#                                   weight_distribution[experiment][epoch][layer][0][neuron-1])
#             kl_divergence_reverse = [kl.item() for kl in kl_divergence_reverse if kl != float('inf')]
#             kl_divergence_reverse = sum(kl_divergence_reverse)
#             kl_divergences_reverse[2][output_idx] += kl_divergence_reverse
#             output_idx += 1

#     # Divide each KL divergence by number_experiments (number of experiments)
#     for i in range(3):
#         kl_divergences_reverse[i] = [kl / number_experiments for kl in kl_divergences_reverse[i]]

#     # Calculate the standard deviation for each layer and store it for this epoch
#     std_hidden1_reverse = torch.std(torch.tensor(kl_divergences_reverse[0]))
#     std_hidden2_reverse = torch.std(torch.tensor(kl_divergences_reverse[1]))
#     std_output_reverse = torch.std(torch.tensor(kl_divergences_reverse[2]))

#     std_deviations_hidden1_reverse.append(std_hidden1_reverse.item())
#     std_deviations_hidden2_reverse.append(std_hidden2_reverse.item())
#     std_deviations_output_reverse.append(std_output_reverse.item())

# # Create a single subplot for all three layers
# fig, ax = plt.subplots(figsize=(8, 6))

# # Plot standard deviation of KL divergence for Hidden Layer 1
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1_reverse, marker='o', linestyle='-', color='blue', label='Hidden Layer 1')

# # Plot standard deviation of KL divergence for Hidden Layer 2
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2_reverse, marker='o', linestyle='-', color='green', label='Hidden Layer 2')

# # Plot standard deviation of KL divergence for Output Layer
# # ax.plot(range(1, num_epochs + 1), std_deviations_output, marker='o', linestyle='-', color='red', label='Output Layer')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence - Bottom Up')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# # Hidden Layer 1
# fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1, marker='o', linestyle='-', color='blue', label='Top Down')
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden1_reverse, marker='o', linestyle='-', color='green', label='Bottom Up')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence for Hidden Layer 1 - Prebuilt')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

In [None]:
# # Hidden Layer 2
# fig, ax = plt.subplots(figsize=(8, 6))
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2, marker='o', linestyle='-', color='blue', label='Top Down')
# ax.plot(range(1, num_epochs + 1), std_deviations_hidden2_reverse, marker='o', linestyle='-', color='green', label='Bottom Up')

# # Set titles and labels
# ax.set_title('Standard Deviation of KL Divergence for Hidden Layer 2 - Prebuilt')
# ax.set_xlabel('Epoch')
# ax.set_ylabel('Standard Deviation')

# # Add a legend
# ax.legend()

# # Show the plot
# plt.tight_layout()
# plt.show()

# Custom MLP - with Dropout

In [None]:
class MultiLayerPerceptron2(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron2, self).__init__()
    self.hidden_layer1 = nn.ModuleList([Perceptron(28*28) for i in range(128)])
    self.hidden_layer2 = nn.ModuleList([Perceptron(128) for i in range(64)])
    self.output_layer = nn.ModuleList([Perceptron(64) for i in range(10)])

  def propagate(self, inputs):
        # Flatten the input data
        inputs = inputs.view(-1, 28*28)

        # Store the matrices of weights
        hidden1_weights = torch.stack([perceptron.weights for perceptron in self.hidden_layer1])
        hidden1_weights = hidden1_weights.squeeze(dim=1)
        hidden2_weights = torch.stack([perceptron.weights for perceptron in self.hidden_layer2])
        hidden2_weights = hidden2_weights.squeeze(dim=1)
        output_weights = torch.stack([perceptron.weights for perceptron in self.output_layer])
        output_weights = output_weights.squeeze(dim=1)

        # Store the vectors of biases
        hidden1_bias = torch.stack([perceptron.bias for perceptron in self.hidden_layer1])
        hidden1_bias = hidden1_bias.view(-1)
        hidden2_bias = torch.stack([perceptron.bias for perceptron in self.hidden_layer2])
        hidden2_bias = hidden2_bias.view(-1)
        output_bias = torch.stack([perceptron.bias for perceptron in self.output_layer])
        output_bias = output_bias.view(-1)

        # Creating output list
        outputs_list = []

        for picture, pixels in enumerate(inputs):
          pixels = torch.relu(F.linear(pixels, hidden1_weights, hidden1_bias))
          pixels = F.dropout(pixels, p=0.5, training=self.training)
          pixels = torch.relu(F.linear(pixels, hidden2_weights, hidden2_bias))
          pixels = F.dropout(pixels, p=0.5, training=self.training)
          outputs_list.append(F.linear(pixels, output_weights, output_bias))

        # Concatenate the output tensors along the 0th dimension
        output = torch.stack(outputs_list)
        return output.squeeze()

In [None]:
weight_distribution3 = []
bias_distribution3 = []
number_experiments = 10

for experiment in range(number_experiments):
  # Model Training
  model3 = MultiLayerPerceptron2()
  loss_function = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model3.parameters(), lr = 0.01)
  num_epochs = 10
  num_neurons = 202
  num_parameters = num_neurons*2 # For weights and biases

  weight_history3 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]
  bias_history3 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]

  for epoch in range(num_epochs):
      epoch_loss = 0
      for batch in trainloader:
          images, labels = batch
          optimizer.zero_grad()
          outputs = model3.propagate(images)
          loss = loss_function(outputs, labels)
          loss.backward()
          optimizer.step()
          epoch_loss += loss.item()
      for idx, param in enumerate(model3.named_parameters()):
          if idx%2!=1:
              weight_history3[epoch][idx].append(param[1].data.clone())
          else:
              bias_history3[epoch][idx].append(param[1].data.clone())

      print("Epoch:", epoch + 1, " Loss:", epoch_loss / len(trainloader))

  weight_distribution3.append(weight_history3)
  bias_distribution3.append(bias_history3)

Epoch: 1  Loss: 1.4618864338408146
Epoch: 2  Loss: 0.8681789220078413
Epoch: 3  Loss: 0.7172107178328643
Epoch: 4  Loss: 0.6288272968487445
Epoch: 5  Loss: 0.5787643726065215
Epoch: 6  Loss: 0.5332080299602643
Epoch: 7  Loss: 0.5049786285233142
Epoch: 8  Loss: 0.48493274773107664
Epoch: 9  Loss: 0.4624874217392031
Epoch: 10  Loss: 0.4479015975046768
Epoch: 1  Loss: 1.4679734059996696
Epoch: 2  Loss: 0.8532824613201593
Epoch: 3  Loss: 0.6912846051172407
Epoch: 4  Loss: 0.6212256017015941
Epoch: 5  Loss: 0.5599839412835615
Epoch: 6  Loss: 0.5256278378718189
Epoch: 7  Loss: 0.49529236361289075
Epoch: 8  Loss: 0.47336639049274326
Epoch: 9  Loss: 0.4530291231567544
Epoch: 10  Loss: 0.4355112289124206
Epoch: 1  Loss: 1.4947346072715482
Epoch: 2  Loss: 0.8727517576614169
Epoch: 3  Loss: 0.7073542041374422
Epoch: 4  Loss: 0.6228729527769312
Epoch: 5  Loss: 0.5658058591171115
Epoch: 6  Loss: 0.5243596357227897
Epoch: 7  Loss: 0.4993816325500576
Epoch: 8  Loss: 0.4771771633517005
Epoch: 9  Loss:

In [None]:
# Model Testing
correct = 0
total = 0
with torch.no_grad():
  for batch in testloader:
    images, labels = batch
    outputs = model3.propagate(images)
    _,predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
accuracy3 = (correct / total)*100
print(accuracy3, "%")

87.72999999999999 %


# Prebuilt MLP - with Dropout

In [None]:
# # Define a simple feedforward neural network
# class NeuralNetwork2(nn.Module):
#     def __init__(self):
#         super(NeuralNetwork, self).__init__()
#         self.fc1 = nn.Linear(28 * 28, 128)  # Input: 28x28 image, Output: 128
#         self.fc2 = nn.Linear(128, 64)      # Hidden layer: 128 -> 64
#         self.fc3 = nn.Linear(64, 10)      # Output: 32 -> 10 (10 classes for MNIST)

#     def forward(self, x):
#         x = x.view(-1, 28 * 28)  # Flatten the input
#         x = torch.relu(self.fc1(x))
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = torch.relu(self.fc2(x))
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = self.fc3(x)
#         return x

In [None]:
# weight_distribution4 = []
# bias_distribution4 = []
# number_experiments = 1

# for experiment in range(number_experiments):
#   # Model Training
#   model4 = NeuralNetwork()
#   loss_function = nn.CrossEntropyLoss()
#   optimizer = optim.SGD(model4.parameters(), lr = 0.01)
#   num_epochs = 10
#   num_neurons = 202
#   num_parameters = num_neurons*2 # For weights and biases

#   weight_history4 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]
#   bias_history4 = [[[] for _ in range(num_parameters)] for _ in range(num_epochs)]

#   for epoch in range(num_epochs):
#       epoch_loss = 0
#       for batch in trainloader:
#           images, labels = batch
#           optimizer.zero_grad()
#           outputs = model4(images)
#           loss = loss_function(outputs, labels)
#           loss.backward()
#           optimizer.step()
#           epoch_loss += loss.item()
#       for idx, param in enumerate(model4.named_parameters()):
#           if idx%2!=1:
#               weight_history4[epoch][idx].append(param[1].data.clone())
#           else:
#               bias_history4[epoch][idx].append(param[1].data.clone())

#       print("Epoch:", epoch + 1, " Loss:", epoch_loss / len(trainloader))

#   weight_distribution4.append(weight_history4)
#   bias_distribution4.append(bias_history4)

In [None]:
# # Model Testing
# correct = 0
# total = 0
# with torch.no_grad():
#   for batch in testloader:
#     images, labels = batch
#     outputs = model4(images)
#     _,predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted == labels).sum().item()
# accuracy4 = (correct / total)*100
# print(accuracy4, "%")

In [None]:
# # Importing the necessary libraries
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.stats import gaussian_kde

# # Bandwidth parameter (adjust this)
# bandwidth = 0.1

# def plot_kde(weights, name):
#     plt.figure(figsize=(8, 4))
#     plt.title(f'{name} - Epoch 1 - 10')

#     for epoch in range(num_epochs):
#         # Create a KDE object with Gaussian kernel
#         kde = gaussian_kde(weights[epoch][0][0], bw_method=bandwidth)

#         # Define a range of x values for the PDF
#         x_values = np.linspace(min(weights[epoch][0][0]), max(weights[epoch][0][0]), 1000)

#         # Calculate the estimated PDF at each x value
#         pdf_values = kde(x_values)

#         # Plot the KDE estimate
#         plt.plot(x_values, pdf_values, label=f'Epoch {epoch + 1}')
#     # plt.title(f'KDE with Gaussian Kernel - Epoch {epoch + 1}')
#     plt.xlabel('Weight')
#     plt.ylabel('Estimated Density')
#     plt.legend()
#     plt.show()

In [None]:
# # Loop through weight distributions for each epoch
# for idx, (name, param) in enumerate(model.named_parameters()):
#   if 'weight' in name:
#       plot_kde([weight_history[i][idx] for i in range(num_epochs)], name)

In [None]:
# # Loop through weight distributions for each epoch
# for idx, (name, param) in enumerate(model.named_parameters()):
#   if 'weight' in name:
#       plot_kde([weight_history2[i][idx] for i in range(num_epochs)], name)

In [None]:
# # Loop through weight distributions for each epoch
# for idx, (name, param) in enumerate(model.named_parameters()):
#   if 'weight' in name:
#       plot_kde([weight_history3[i][idx] for i in range(num_epochs)], name)

In [None]:
# # Loop through weight distributions for each epoch
# for idx, (name, param) in enumerate(model.named_parameters()):
#   if 'weight' in name:
#       plot_kde([weight_history4[i][idx] for i in range(num_epochs)], name)

In [None]:
# def plot_kde2(weights1, weights2, epoch):
#     for idx, (name, param) in enumerate(model.named_parameters()):
#       if 'weight' in name:
#           plt.figure(figsize=(8, 4))
#           plt.title(f'Epoch {epoch} - {name}')
#           print(len(weights1))
#           kde1 = gaussian_kde(weights1[idx][0][0], bw_method=bandwidth)
#           kde2 = gaussian_kde(weights2[idx][0][0], bw_method=bandwidth)

#           x_values = np.linspace(min(min(weights1[idx][0][0]), min(weights2[idx][0][0])),
#                                 max(max(weights1[idx][0][0]), max(weights2[idx][0][0])), 1000)

#           pdf_values1 = kde1(x_values)
#           pdf_values2 = kde2(x_values)

#           plt.plot(x_values, pdf_values1, label=f'Without Dropout', linestyle='-')
#           plt.plot(x_values, pdf_values2, label=f'With Dropout', linestyle='--')

#           plt.xlabel('Weight')
#           plt.ylabel('Estimated Density')
#           plt.legend()
#           plt.show()

# for epoch in range(num_epochs):
#   plot_kde2(weight_history[epoch], weight_history3[epoch], epoch)


In [None]:
import json
import numpy as np
import torch

def convert_to_json_serializable(obj):
    if isinstance(obj, torch.Tensor):
        return obj.cpu().detach().numpy().tolist()
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (int, float, str)):
        return obj
    elif isinstance(obj, list):
        return [convert_to_json_serializable(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: convert_to_json_serializable(value) for key, value in obj.items()}
    else:
        return None  # Handle other types as needed

# Convert weight_history to a JSON-serializable format
serializable_weight_history = convert_to_json_serializable(weight_history3)

# Convert to JSON
json_data = json.dumps(serializable_weight_history)

# Save and download the JSON file
with open("0.5_weight_history_dropout.json", "w") as json_file:
    json_file.write(json_data)

# Download the JSON file in Google Colab
from google.colab import files
files.download("0.5_weight_history_dropout.json")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Convert weight_history to a JSON-serializable format
serializable_weight_history = convert_to_json_serializable(bias_history3)

# Convert to JSON
json_data = json.dumps(serializable_weight_history)

# Save and download the JSON file
with open("0.5_bias_history_dropout.json", "w") as json_file:
    json_file.write(json_data)

# Download the JSON file in Google Colab
from google.colab import files
files.download("0.5_bias_history_dropout.json")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>