In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import Compose, ToTensor, Normalize
import numpy as np
import seaborn as sn
from random import choice

In [2]:
train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST(root ='./files', train=True, download = True, 
         transform = Compose([
             ToTensor(),
             Normalize(
                 (0.1307,), (0.3081,))
         ])),
    batch_size = 1, shuffle = True)

In [5]:
examples = enumerate(train_loader)
data = []
for batch_idx, (in_features, out_features) in examples:
    zero = np.zeros(10)
    zero[out_features.numpy()[0]] = 1
    data.append([in_features.numpy().reshape(-1), zero])

for i in range(len(data)):
    data[i] = [torch.Tensor(data[i][0]).to('cuda:2'), torch.Tensor(data[i][1]).to('cuda:2')]
    

In [6]:
dim_x, dim_y, dim_h, N = 28*28, 10, 10, 100

class Base(nn.Module):
    def __init__(self):
        super(Base, self).__init__()
        self.l1 = nn.Linear(dim_x, dim_h, bias = False)
        self.l2 = nn.Linear(dim_h, dim_y, bias = False)
    def forward(self, x):
        return (self.l2(nn.functional.softmax(self.l1(x))))
    
class Model(nn.Module):
    def __init__(self, N):
        super(Model, self).__init__()
        self.layers = nn.ModuleList([Base() for i in range(N)])
    def forward(self, x):
        s = 0 
        for net in self.layers:
            s += net(x)
        return s/len(self.layers)

In [None]:

N_options = [10, 100, 1000, 10000]
t = 1

for n in N_options:
    N=n
    model = Model(N)
    model.to('cuda:2')
    criterion = nn.MSELoss().to('cuda:2')
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
    for epoch in range (t*N):
        data_point = choice(data)
        x = data_point[0]
        y = data_point[1]
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if epoch%1000 == 0:
            print(epoch, ":", loss)
            
    parameters = []
    j = 0
    for m in model.modules():
        if isinstance(m, nn.Linear):
            a = m.weight.reshape(-1)
            for i in range(len(a)):
                parameters.append(float(a[i].data))
            if j%1000 == 0:
                print(j)
            j+=1
    
    ax = sn.displot(parameters, stat = 'density', kde = False)
    hidden_layers = str(N)
    ax.savefig("Density: N=%s,t= 1.png" % hidden_layers)
    print("Hidden Layer N is completed.")

  return (self.l2(nn.functional.softmax(self.l1(x))))


0 : tensor(0.1053, device='cuda:2', grad_fn=<MseLossBackward0>)
0
Hidden Layer N is completed.
0 : tensor(0.0985, device='cuda:2', grad_fn=<MseLossBackward0>)


  return (self.l2(nn.functional.softmax(self.l1(x))))


0
Hidden Layer N is completed.


  return (self.l2(nn.functional.softmax(self.l1(x))))


0 : tensor(0.0995, device='cuda:2', grad_fn=<MseLossBackward0>)
