In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

from recnet import *

In [2]:
normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])

train_dataset = torchvision.datasets.CIFAR100(
    root='../data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))

Files already downloaded and verified


In [3]:
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, )

In [3]:
affine = recnet_affine_modular(num_classes=100, gruLoss=False)

In [4]:
affine

RecNetAffineModular(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (alphas): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 32]
      (1): Parameter containing: [torch.FloatTensor of size 64]
      (2): Parameter containing: [torch.FloatTensor of size 128]
    
  )
  (betas): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 32]
      (1): Parameter containing: [torch.FloatTensor of size 64]
      (2): Parameter containing: [torch.FloatTensor of size 128]
    
  )
  (relu): ReLU(inplace)
  (layer1): Sequential(
    (0): RecNetBlockAffineModular(
      (module1): RecNetAffineModule(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gru): GRU(32, 32)
        (relu): ReLU()
      )
      (module2): RecNetAffineModule(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gru): GRU(32, 32)
    

In [None]:
id(model.layer3[1].convs[1])

In [None]:
id(sharedBNmodel.layer3[1].batchNorms[1])

In [None]:
recnet=RecNet(RecNetBlock, [9,9,9], num_classes=100)

In [None]:
resnet=resnet56_cifar()

In [5]:
input = train_dataset[0][0]

In [6]:
 affine(input.unsqueeze(0))

(tensor([[ 0.1005, -0.0909,  0.2760,  0.3906,  0.0249, -0.1711,  0.4473,
          -0.4553, -0.1480, -0.3758,  0.2406, -0.1202,  0.0081, -0.0532,
          -0.1601,  0.1584, -0.3024,  0.0551, -0.4280,  0.1602,  0.1727,
          -0.2168,  0.0022,  0.1946, -0.4382,  0.0049, -0.0668, -0.1314,
           0.4382, -0.1078, -0.3650,  0.0163, -0.0991, -0.2412, -0.2445,
          -0.0076,  0.2111,  0.0339,  0.0916, -0.1264,  0.1627, -0.2815,
          -0.0109, -0.0517, -0.0459,  0.2965, -0.2492,  0.3736,  0.2387,
          -0.3886, -0.1718,  0.2012,  0.1049,  0.0306,  0.1403, -0.1965,
          -0.4781,  0.0751,  0.3424,  0.2202, -0.2753,  0.0275,  0.2625,
          -0.3399,  0.1416,  0.0384,  0.1150, -0.2118, -0.0841,  0.1566,
           0.0780,  0.0335,  0.3105, -0.1111, -0.0695, -0.3465, -0.1091,
           0.4439,  0.0387, -0.1136, -0.1926, -0.3487,  0.0172, -0.0396,
           0.0154,  0.3084,  0.1881, -0.2998,  0.3445,  0.2936,  0.0189,
           0.0482, -0.2344, -0.0949,  0.1845,  0.13

In [6]:
affine

RecNetAffineModular(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (alphas): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 32]
      (1): Parameter containing: [torch.FloatTensor of size 64]
      (2): Parameter containing: [torch.FloatTensor of size 128]
    
  )
  (betas): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 32]
      (1): Parameter containing: [torch.FloatTensor of size 64]
      (2): Parameter containing: [torch.FloatTensor of size 128]
    
  )
  (relu): ReLU(inplace)
  (layer1): Sequential(
    (0): RecNetBlockAffineModular(
      (module1): RecNetAffineModule(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gru): GRU(32, 32)
        (relu): ReLU()
      )
      (module2): RecNetAffineModule(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gru): GRU(32, 32)
    

In [None]:
id(model.layer1[4].convs[0])

In [None]:
id(model.layer1[5].convs[0])

In [None]:
        BatchNormList=[[nn.ModuleList([nn.BatchNorm2d(16) for _ in range(2)]) for _ in range(9)] for _ in range(3)]


In [None]:
id(model.layer2[2].batchNorms)

In [None]:
id(resnet.layer2[2].conv1)

In [None]:
model

In [None]:
model_prelu= RecNet(RecNetBlock_postrelu, [9,9,9], num_classes=100)

In [None]:
model_prelu

In [None]:
model_prelu(input.unsqueeze(0))

In [None]:
id(sharedBNmodel.layer1[3].batchNorms)

In [None]:
for param in (affine.AffineIterator())L

In [None]:
'linearA' in ['linearA']

In [6]:
out = torch.randn(128,64,8,8)

In [7]:
torch.sum(torch.sum(out, 3),2).mean(0)

tensor([ 0.2868,  0.2680,  0.1610, -0.0926, -0.2603, -0.5853, -0.1729,
         0.5398,  0.5768, -0.3498, -0.2608, -0.5950,  0.3772,  0.8407,
         1.2609,  0.3743,  0.0151, -0.4482,  0.0730, -0.1091, -0.6373,
        -0.1905,  0.0080, -0.2198,  0.6910,  0.0511,  0.2156,  0.2679,
        -0.0432,  0.2368, -0.1560,  0.2730, -1.2759, -0.5382,  0.2982,
         0.5298, -0.5716, -1.5716, -0.8164, -0.6547,  0.5809, -0.2252,
        -0.4238,  0.6382,  1.0932, -1.0645,  0.5984, -0.6608,  0.6287,
         0.2035, -1.0913, -0.5186, -0.1308, -0.0293, -0.7256,  0.8981,
         2.0408,  0.2094, -0.1322,  0.3747, -1.0197,  0.1687, -0.0536,
        -0.7615])

In [8]:
torch.mean(torch.mean(torch.mean(out, 3),2), 0)

tensor(1.00000e-02 *
       [ 0.4482,  0.4188,  0.2516, -0.1446, -0.4067, -0.9146, -0.2701,
         0.8434,  0.9012, -0.5465, -0.4075, -0.9297,  0.5894,  1.3136,
         1.9701,  0.5849,  0.0236, -0.7002,  0.1140, -0.1705, -0.9957,
        -0.2976,  0.0125, -0.3434,  1.0797,  0.0799,  0.3369,  0.4186,
        -0.0675,  0.3700, -0.2437,  0.4266, -1.9936, -0.8409,  0.4660,
         0.8279, -0.8932, -2.4557, -1.2756, -1.0229,  0.9076, -0.3518,
        -0.6623,  0.9971,  1.7081, -1.6633,  0.9350, -1.0324,  0.9824,
         0.3180, -1.7051, -0.8103, -0.2043, -0.0458, -1.1337,  1.4033,
         3.1887,  0.3272, -0.2065,  0.5854, -1.5933,  0.2636, -0.0837,
        -1.1898])

In [25]:
out = torch.randn(64,128,8,8)

In [19]:
out[0].sum(2).sum(1).mean()

tensor(-2.0809)

In [66]:
criterion=nn.CrossEntropyLoss()

In [38]:
out2[0][0].mean()

tensor(1.00000e-02 *
       6.9180)

In [23]:
out2 = out.reshape(64,128,8,8)

In [39]:
outmeans1 = []

In [8]:
input = torch.randn(1,16,32,32)

In [9]:
residualConv = nn.Conv2d(16, 32, kernel_size=1, stride=1, bias=False)

In [10]:
testres = residualConv(input)

In [11]:
testres

tensor([[[[-5.1346e-01,  6.8032e-01, -3.1067e-01,  ...,  4.5443e-01,
           -4.4668e-01,  8.0733e-01],
          [-8.7927e-02,  5.5278e-01, -8.9653e-01,  ..., -5.8630e-01,
           -5.3038e-01,  2.2492e-01],
          [-7.6860e-01,  1.2568e-01, -8.5487e-01,  ..., -3.6219e-01,
           -9.7260e-02,  5.5858e-02],
          ...,
          [-5.3793e-01, -5.2672e-01,  1.3595e+00,  ..., -7.7076e-01,
           -2.8743e-01, -3.7667e-01],
          [ 1.6436e-01,  8.9822e-01, -5.6351e-01,  ..., -2.3687e-01,
            5.1513e-01,  2.0109e-01],
          [ 3.9716e-01, -4.1295e-01, -1.9378e-01,  ...,  3.4838e-01,
            7.1605e-01, -7.2828e-01]],

         [[ 5.6272e-01, -1.5440e-01, -7.9834e-01,  ..., -4.5950e-01,
           -9.1085e-01, -9.8680e-02],
          [-4.2512e-01, -1.4251e+00,  8.6418e-02,  ..., -6.1087e-01,
           -1.4451e-01, -4.2384e-01],
          [-1.4771e-01, -1.7635e-01, -2.0220e-01,  ..., -3.2012e-01,
           -8.7224e-01, -7.5484e-01],
          ...,
     