In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

In [None]:
dataset = CIFAR10(root='data/', download=True, transform=ToTensor())
test_dataset = CIFAR10(root='data/', train=False, transform=ToTensor())

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data/


In [None]:
torch.manual_seed(43)
val_size = 5000
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])
batch_size=125
len(train_ds), len(val_ds)  
len(test_dataset)

10000

In [None]:
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size, num_workers=4, pin_memory=True)

  cpuset_checked))


In [None]:
input_size = 3*32*32
output_size = 10

In [None]:
"""VGG(
  (convs): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
    (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (18): ReLU()
    (19): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (22): ReLU()
    (23): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (26): ReLU()
    (27): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (28): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (30): ReLU()
    (31): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (33): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (34): ReLU()
    (35): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (36): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (37): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (38): ReLU()
    (39): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (42): ReLU()
    (43): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (44): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (45): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (46): ReLU()
    (47): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (48): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (49): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (50): ReLU()
    (51): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fcs): Sequential(
    (0): Linear(in_features=512, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
"""




class VGGNEW(nn.Module):
    def __init__(self):
        super().__init__()
        self.C11= nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.B11=nn.BatchNorm2d(64)
        self.M11=nn.MaxPool2d(2,2)
        

        self.C12=nn.Conv2d(64, 64, kernel_size=(3, 3),padding=1)
        self.B12=nn.BatchNorm2d(64)
        self.M12=nn.MaxPool2d(2,2)
        

        self.C21=nn.Conv2d(64, 128,kernel_size=(3, 3), padding=(1, 1))
        self.B21=nn.BatchNorm2d(128)
        self.M21=nn.MaxPool2d(2,2,padding=1)

        self.C22=nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1))
        self.B22=nn.BatchNorm2d(128)
        self.M22=nn.MaxPool2d(2,2,padding=1)

        self.C31=nn.Conv2d(128, 256, kernel_size=(3, 3), padding=(1, 1))
        self.B31=nn.BatchNorm2d(256)
        self.M31=nn.MaxPool2d(2,2,padding=1)

        self.C32=nn.Conv2d(256, 256, kernel_size=(3, 3),  padding=(1, 1))
        self.B32=nn.BatchNorm2d(256)
        self.M32=nn.MaxPool2d(2,2,padding=1)

        self.C33=nn.Conv2d(256, 256, kernel_size=(3, 3),  padding=(1, 1))
        self.B33=nn.BatchNorm2d(256)
        self.M33=nn.MaxPool2d(2,2,padding=1)

        self.C41=nn.Conv2d(256, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B41=nn.BatchNorm2d(512)
        self.M41=nn.MaxPool2d(2,2,padding=1)

        self.C42=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B42=nn.BatchNorm2d(512)
        self.M42=nn.MaxPool2d(2,2,padding=1)

        self.C43=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B43=nn.BatchNorm2d(512)
        self.M43=nn.MaxPool2d(2,2,padding=1)

        self.C51=nn.Conv2d(512, 512, kernel_size=(3, 3), padding=(1, 1))
        self.B51=nn.BatchNorm2d(512)
        self.M51=nn.MaxPool2d(2,2,padding=1)

        self.C52=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B52=nn.BatchNorm2d(512)
        self.M52=nn.MaxPool2d(2,2,padding=1)

        self.C53=nn.Conv2d(512, 512, kernel_size=(3, 3), padding=(1, 1))
        self.B53=nn.BatchNorm2d(512)
        self.M53=nn.MaxPool2d(2,2,padding=1)

        self.FF=nn.Flatten()
        self.L1=nn.Linear(in_features=2048, out_features=4096, bias=True)
        self.L2=nn.Linear(in_features=4096, out_features=4096, bias=True)
        self.L3=nn.Linear(in_features=4096, out_features=10, bias=True)

        
    def forward(self, xb):
        #out = xb.view(xb.size(0), -1)
        self.G=[]
        SIGMOID=torch.nn.Sigmoid()
        beta=10

        out = self.C11(xb)
        out=self.B11(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M11(out)
        
        out = self.C12(out)
        out=self.B12(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M12(out)
        
        out = self.C21(out)
        out=self.B21(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M21(out)
        
        out = self.C22(out)
        out=self.B22(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M22(out)
        
        out = self.C31(out)
        out=self.B31(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M31(out)
        
        out = self.C32(out)
        out=self.B32(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M32(out)
        
        out = self.C33(out)
        out=self.B33(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M33(out)
        
        out = self.C41(out)
        out=self.B41(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M41(out)
        
        out = self.C42(out)
        out=self.B42(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M42(out)
        
        out = self.C43(out)
        out=self.B43(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M43(out)
        
        out = self.C51(out)
        out=self.B51(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M51(out)
        
        out = self.C52(out)
        out=self.B52(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M52(out)

        out = self.C53(out)
        out=self.B53(out)
        self.G.append(SIGMOID(beta*out))
        out = self.M53(out)

        out=self.FF(out)
        out=self.L1(out)
        self.G.append(SIGMOID(beta*out))
        out=self.L2(out)
        self.G.append(SIGMOID(beta*out))
        out = self.L3(out)
        return out
    def gate(self):
      return self.G

In [None]:
Model=VGGNEW()
Model

VGGNEW(
  (C11): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (C12): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B12): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (C21): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B21): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M21): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (C22): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B22): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M22): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=Fal

In [None]:
criterion = nn.CrossEntropyLoss()
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs): 
        for batch,labels in train_loader:
            out = model.forward(batch)
            loss = criterion(out,labels)
            loss.backward()
            optimizer.step()
            
            optimizer.zero_grad()
        
        if (epoch+1) % 1 == 0:
          print (f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

In [None]:
  fit(5,0.001,Model,train_loader,val_loader)

  cpuset_checked))


Epoch [1/5], Loss: 2.0483
Epoch [2/5], Loss: 1.7095
Epoch [3/5], Loss: 1.5739
Epoch [4/5], Loss: 1.3822
Epoch [5/5], Loss: 1.3403


In [None]:
#Testing the model 
n_correct = 0
n_samples = 0
for images, labels in test_loader:
  labels = labels
  outputs = Model.forward(images)
  # max returns (value ,index)
  _, predicted = torch.max(outputs.data, 1)
  n_samples += labels.size(0)
  n_correct += (predicted == labels).sum().item()  
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

  cpuset_checked))


Accuracy of the network on the 10000 test images: 52.96 %


In [None]:




class VGGWeight(nn.Module):
    def __init__(self):
        super().__init__()
        self.C11= nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.B11=nn.BatchNorm2d(64)
        self.M11=nn.MaxPool2d(2,2)
        

        self.C12=nn.Conv2d(64, 64, kernel_size=(3, 3),padding=1)
        self.B12=nn.BatchNorm2d(64)
        self.M12=nn.MaxPool2d(2,2)
        

        self.C21=nn.Conv2d(64, 128,kernel_size=(3, 3), padding=(1, 1))
        self.B21=nn.BatchNorm2d(128)
        self.M21=nn.MaxPool2d(2,2,padding=1)

        self.C22=nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1))
        self.B22=nn.BatchNorm2d(128)
        self.M22=nn.MaxPool2d(2,2,padding=1)

        self.C31=nn.Conv2d(128, 256, kernel_size=(3, 3), padding=(1, 1))
        self.B31=nn.BatchNorm2d(256)
        self.M31=nn.MaxPool2d(2,2,padding=1)

        self.C32=nn.Conv2d(256, 256, kernel_size=(3, 3),  padding=(1, 1))
        self.B32=nn.BatchNorm2d(256)
        self.M32=nn.MaxPool2d(2,2,padding=1)

        self.C33=nn.Conv2d(256, 256, kernel_size=(3, 3),  padding=(1, 1))
        self.B33=nn.BatchNorm2d(256)
        self.M33=nn.MaxPool2d(2,2,padding=1)

        self.C41=nn.Conv2d(256, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B41=nn.BatchNorm2d(512)
        self.M41=nn.MaxPool2d(2,2,padding=1)

        self.C42=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B42=nn.BatchNorm2d(512)
        self.M42=nn.MaxPool2d(2,2,padding=1)

        self.C43=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B43=nn.BatchNorm2d(512)
        self.M43=nn.MaxPool2d(2,2,padding=1)

        self.C51=nn.Conv2d(512, 512, kernel_size=(3, 3), padding=(1, 1))
        self.B51=nn.BatchNorm2d(512)
        self.M51=nn.MaxPool2d(2,2,padding=1)

        self.C52=nn.Conv2d(512, 512, kernel_size=(3, 3),  padding=(1, 1))
        self.B52=nn.BatchNorm2d(512)
        self.M52=nn.MaxPool2d(2,2,padding=1)

        self.C53=nn.Conv2d(512, 512, kernel_size=(3, 3), padding=(1, 1))
        self.B53=nn.BatchNorm2d(512)
        self.M53=nn.MaxPool2d(2,2,padding=1)

        self.FF=nn.Flatten()
        self.L1=nn.Linear(in_features=2048, out_features=4096, bias=True)
        self.D1=nn.Dropout(p=0.5, inplace=False)
        self.L2=nn.Linear(in_features=4096, out_features=4096, bias=True)
        self.D2=nn.Dropout(p=0.5, inplace=False)
        self.L3=nn.Linear(in_features=4096, out_features=10, bias=True)

        
    def forward(self, xb,G):
        #out = xb.view(xb.size(0), -1)
        SIGMOID=torch.nn.Sigmoid()
        beta=10
        i=0
        out = self.C11(xb)
        out=self.B11(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M11(out)
        
        
        out = self.C12(out)
        out=self.B12(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M12(out)
        
        out = self.C21(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M21(out)
        print(out.shape)
        
        out = self.C22(out)
        out=self.B22(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M22(out)
        print(out.shape)
        
        out = self.C31(out)
        out=self.B31(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M31(out)
        
        out = self.C32(out)
        out=self.B32(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M32(out)
        
        out = self.C33(out)
        out=self.B33(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M33(out)
        
        out = self.C41(out)
        out=self.B41(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M41(out)
        
        out = self.C42(out)
        out=self.B42(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M42(out)
        
        out = self.C43(out)
        out=self.B43(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M43(out)
        
        out = self.C51(out)
        out=self.B51(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M51(out)
        
        out = self.C52(out)
        out=self.B52(out)
        out = out*G[i].detach()
        i=i+1
        out = self.M52(out)

        out = self.C53(out)
        out=self.B53(out)
        print(i)
        out = out*G[i].detach()
        i=i+1
        out = self.M53(out)

        out=self.FF(out)
        out=self.L1(out)
        out = out*G[i].detach();
        i=i+1
        out=self.D1(out)
        out=self.L2(out)
        out = out*G[i].detach();
        out=self.D2(out)
        out = self.L3(out)
        return out

In [None]:
Modelw=VGGWeight()
Modelw

VGGWeight(
  (C11): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (C12): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B12): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M12): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (C21): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B21): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M21): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  (C22): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (B22): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (M22): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=

In [None]:
#print(Model.gate())
j=0
for i in enumerate(Model.gate()):
  j=j+1
Model.gate()[0].shape


torch.Size([125, 64, 32, 32])

In [None]:
criterion = nn.CrossEntropyLoss()
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs): 
        for image,labels in train_loader:
            All_ones=torch.ones(image.shape)
            out = model.forward(All_ones,Model.gate())
            loss = criterion(out,labels)
            loss.backward()
            optimizer.step()
            
            optimizer.zero_grad()
        
        if (epoch+1) % 1 == 0:
          print (f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

In [None]:
fit(5,0.001,Modelw,train_loader,val_loader) 

  cpuset_checked))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128

In [None]:
#Testing the model 
n_correct = 0
n_samples = 0
for images, labels in test_loader:
  labels = labels
  outputs = Modelw.forward(images,Model.gate())
  # max returns (value ,index)
  _, predicted = torch.max(outputs.data, 1)
  n_samples += labels.size(0)
  n_correct += (predicted == labels).sum().item()  
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

  cpuset_checked))


torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5, 5])
torch.Size([125, 128, 3, 3])
12
torch.Size([125, 128, 5,