### Imports

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import os
import sys

In [3]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from exp.nb_02 import *

### 02a_why_sqrt5

In [4]:
def get_data():
    path = datasets.download_data(MNIST_URL, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

def normalize(x, m, s): return (x-m)/s

In [5]:
x_train,y_train,x_valid,y_valid = get_data()
train_mean,train_std = x_train.mean(),x_train.std()
x_train = normalize(x_train, train_mean, train_std)
x_valid = normalize(x_valid, train_mean, train_std)

x_train = x_train.view(-1,1,28,28)
x_valid = x_valid.view(-1,1,28,28)
x_train.shape,x_valid.shape

(torch.Size([50000, 1, 28, 28]), torch.Size([10000, 1, 28, 28]))

In [6]:
def stats(x): return x.mean(), x.std()

In [7]:
stats(x_train), stats(x_valid)

((tensor(-6.2598e-06), tensor(1.)), (tensor(-0.0059), tensor(0.9924)))

In [16]:
n, c, *_ = x_train.shape
print(n, c)

l1 = nn.Conv2d(in_channels=c, out_channels=32, kernel_size=5)

50000 1


In [406]:
import torch.nn.functional as F
def f1(x, a=0): return F.leaky_relu(l1(x), a)
def f2(x, a=0): return F.leaky_relu(l1(x), a)-0.5

In [142]:
# stats(l1.weight), stats(l1.bias)

In [409]:
a1 = l1(x_train[:1000])
a2 = f1(x_train[:1000])
a3 = f2(x_train[:1000])
stats(a1), stats(a2), stats(a3)

((tensor(0.0930, grad_fn=<MeanBackward1>),
  tensor(1.6379, grad_fn=<StdBackward0>)),
 (tensor(0.5932, grad_fn=<MeanBackward1>),
  tensor(1.1778, grad_fn=<StdBackward0>)),
 (tensor(0.0932, grad_fn=<MeanBackward1>),
  tensor(1.1778, grad_fn=<StdBackward0>)))

In [19]:
l1.weight.shape, l1.bias.shape

(torch.Size([32, 1, 5, 5]), torch.Size([32]))

In [121]:
nn.modules.conv._ConvNd.reset_parameters??
# nn.Conv2d??

In [21]:
init??

In [22]:
l1.weight.shape

torch.Size([32, 1, 5, 5])

In [20]:
nf, ni, *_ = l1.weight.shape
fan_in, fan_out = l1.weight[0][0].numel()*ni, l1.weight[0][0].numel()*nf 
fan_in, fan_out

(25, 800)

In [23]:
def gain(a=0): return math.sqrt(2./(1+a**2))

In [24]:
gain(0), gain(1.), gain(math.sqrt(5.)), gain(0.01)

(1.4142135623730951, 1.0, 0.5773502691896257, 1.4141428569978354)

In [25]:
torch.zeros(10000).uniform_(-1,1).std()
1/math.sqrt(3)

0.5773502691896258

In [410]:
def init_kaiming_unif(w, a=0):
    g = gain(a)
    nf, ni, *_ = w.shape
    fan_in, fan_out = w[0][0].numel()*ni, w[0][0].numel()*nf 
    std = g*math.sqrt(3.)/math.sqrt(fan_in)
    with torch.no_grad():
        w.data.uniform_(-std, std)

In [411]:
l1 = nn.Conv2d(in_channels=c, out_channels=32, kernel_size=5)
init_kaiming_unif(l1.weight, a=1)
print(stats(l1(x_train[:10000])))
init_kaiming_unif(l1.weight, a=math.sqrt(5))
print(stats(l1(x_train[:10000])))

(tensor(0.0479, grad_fn=<MeanBackward1>), tensor(1.1573, grad_fn=<StdBackward0>))
(tensor(0.0357, grad_fn=<MeanBackward1>), tensor(0.6040, grad_fn=<StdBackward0>))


In [415]:
l1 = nn.Conv2d(in_channels=c, out_channels=32, kernel_size=5)
for a in [0, math.sqrt(5), 1, 0.01]:
    init_kaiming_unif(l1.weight, a)
    print(stats(F.leaky_relu(l1(x_train[:1000]), a)))
    print(stats(f2(x_train[:1000], a)))

(tensor(0.4712, grad_fn=<MeanBackward1>), tensor(0.8679, grad_fn=<StdBackward0>))
(tensor(-0.0288, grad_fn=<MeanBackward1>), tensor(0.8679, grad_fn=<StdBackward0>))
(tensor(-0.2765, grad_fn=<MeanBackward1>), tensor(1.0588, grad_fn=<StdBackward0>))
(tensor(-0.7765, grad_fn=<MeanBackward1>), tensor(1.0588, grad_fn=<StdBackward0>))
(tensor(-0.0237, grad_fn=<MeanBackward1>), tensor(0.9902, grad_fn=<StdBackward0>))
(tensor(-0.5237, grad_fn=<MeanBackward1>), tensor(0.9902, grad_fn=<StdBackward0>))
(tensor(0.5179, grad_fn=<MeanBackward1>), tensor(1.0137, grad_fn=<StdBackward0>))
(tensor(0.0179, grad_fn=<MeanBackward1>), tensor(1.0137, grad_fn=<StdBackward0>))


#### Trying on a small convnet

In [226]:
class Flatten(nn.Module): 
    def forward(self, x): return x.view(-1)

In [229]:
a=0
m = nn.Sequential(nn.Conv2d(1, 8, 3, 1), nn.LeakyReLU(a),
                  nn.Conv2d(8, 16, 3, 1), nn.LeakyReLU(a),
                  nn.Conv2d(16, 32, 3, 1), nn.LeakyReLU(a),
                  nn.Conv2d(32, 1, 3, 1), nn.LeakyReLU(a),
                  nn.AdaptiveAvgPool2d(1),
                  Flatten()
             )

In [228]:
o = m(x_train[:1000])
stats(o)

(tensor(0.0552, grad_fn=<MeanBackward1>),
 tensor(0.0177, grad_fn=<StdBackward0>))

In [230]:
for l in m:
    if isinstance(l, nn.Conv2d):
        init_kaiming_unif(l.weight, a)

In [231]:
o = m(x_train[:1000])
stats(o)

(tensor(0.5145, grad_fn=<MeanBackward1>),
 tensor(0.1467, grad_fn=<StdBackward0>))

In [232]:
l = mse(o, y_train[:1000].squeeze().float())

In [233]:
l.backward()

In [236]:
stats(m[2].weight.grad)

(tensor(-0.0844), tensor(0.7058))

### 02b_initializing.ipynb

In [294]:
x = torch.randn(512)
w = torch.randn(512,512)/math.sqrt(512)
for i in range(200):    
    x = w@x

In [295]:
stats(x)

(tensor(-1.2225), tensor(35.7302))

In [402]:
mean, std = 0., 0.
n=1000
for i in range(n):
    x = torch.randn(32)
    w = torch.randn(100, 32)
    y = w@x
    mean += y.mean().item()
    std += (y**2).mean().item()
mean/n, std/n

(-0.016592334284090612, 32.016327174186706)

In [403]:
y.mean(), y.std()

(tensor(0.0883), tensor(4.8142))

In [325]:
w.mean(dim=1), w.std(dim=1)

(tensor([-0.1233,  0.0480,  0.0570, -0.0015,  0.0229, -0.0169, -0.0005,  0.0981,
         -0.0112,  0.0689,  0.0260, -0.1014,  0.0292,  0.0116, -0.0076,  0.0644,
         -0.0298,  0.0618,  0.0090,  0.0259, -0.0933, -0.0680, -0.0233,  0.0102,
         -0.0211, -0.0940, -0.0103,  0.0994,  0.0245,  0.0298, -0.1229, -0.0456,
         -0.0930, -0.0409, -0.0207, -0.0325,  0.0518, -0.0786, -0.0440,  0.0068,
         -0.0556, -0.0508,  0.1306, -0.0129, -0.0778,  0.1230,  0.0163,  0.1039,
          0.0104, -0.0281]),
 tensor([1.0739, 0.9665, 1.0041, 1.0122, 0.9644, 1.0310, 1.0226, 0.9106, 0.9967,
         0.9097, 0.9718, 0.9955, 1.0156, 1.0245, 1.0099, 1.0104, 1.0688, 0.9914,
         0.9191, 1.0269, 0.9857, 0.9528, 1.0002, 0.9658, 0.9792, 0.9547, 1.0082,
         1.0712, 1.0608, 1.0109, 1.0221, 0.9700, 0.9310, 0.9799, 0.9861, 0.9561,
         1.0112, 1.0467, 1.0595, 0.9972, 1.0612, 0.9885, 0.9632, 1.0269, 1.0358,
         1.0371, 1.0463, 1.0602, 1.0161, 0.9818]))

In [326]:
 w.std(dim=0)

tensor([0.9861, 0.9482, 0.7974, 0.9973, 1.0806, 0.9296, 1.1403, 0.9300, 1.0402,
        1.0443, 0.9706, 0.9451, 1.0016, 1.0114, 0.9424, 1.2161, 0.9011, 0.9269,
        1.0766, 1.0051, 0.9971, 0.9165, 1.0086, 0.9589, 0.9908, 0.9972, 1.1428,
        0.9858, 0.9135, 0.8405, 1.0912, 1.1305, 0.7531, 1.1185, 1.1186, 0.9244,
        0.9840, 1.0119, 0.8395, 1.0265, 0.8017, 1.2087, 1.0237, 0.9954, 1.0181,
        0.9400, 1.0371, 0.8747, 0.7978, 1.0548, 1.0232, 0.9359, 1.2695, 0.9617,
        1.0008, 0.9207, 0.9276, 1.1736, 0.9094, 0.9807, 1.0287, 0.9980, 1.0306,
        0.8410, 0.9552, 1.0514, 0.9007, 0.9864, 0.9331, 0.9430, 0.9479, 0.9631,
        0.8606, 1.1455, 1.0436, 1.0259, 1.0779, 1.0314, 0.9683, 1.0342, 1.0324,
        0.9930, 0.9335, 0.8747, 1.1453, 1.0166, 0.9305, 0.9945, 0.9811, 1.0576,
        0.9134, 0.9097, 0.8807, 0.9981, 1.0646, 0.9226, 0.8245, 1.0424, 1.1546,
        1.0204, 1.0111, 1.1093, 1.1674, 0.9387, 1.0724, 1.0099, 0.9880, 1.1482,
        0.7920, 0.9116, 1.0070, 1.0148, 

In [388]:
mean, std = 0., 0.
n=10
for i in range(n):
    x = torch.randn(1)
    w = torch.randn(10,1)
    y = w@x
    mean += y.mean().item()
    std += (y**2).mean().item()
#     print(x, w, y, y.mean().item(), (y**2).mean().item())
    print(y.mean().item(), (y**2).mean().item())
mean/n, std/n

0.047691911458969116 2.0805296897888184
-0.09473999589681625 0.10034643113613129
0.30879807472229004 0.3178930878639221
-0.00023695515119470656 1.6705532743799267e-06
0.2800590991973877 0.6824519038200378
0.4337385296821594 0.8597820997238159
-0.10465200245380402 0.18564598262310028
0.00699568260461092 0.00838479120284319
0.42169514298439026 2.1988871097564697
-0.3262069821357727 0.4019150137901306


(0.09731425050122197, 0.6835837780258543)

### 03_minibatch_training.ipynb

In [490]:
#export
from exp.nb_02 import *
import torch.nn.functional as F

In [511]:
x_train,y_train,x_valid,y_valid = get_data()
n, nc = x_train.shape
c = y_train.max()+1
nh = 50

In [683]:
class Model(nn.Module):
    def __init__(self, ni, nh, no):
        super().__init__()
        self.layers = [nn.Linear(ni, nh), nn.ReLU(), nn.Linear(nh, no)]
    
    def __call__(self, x):
        for i in self.layers:
            x = i(x)
        return x

In [692]:
m = Model(nc, nh, c.item())

###### Loss function

In [520]:
pred_train = m(x_train)

In [523]:
# Loss function - log_softmax, NLL

In [533]:
pred_train.exp().sum(dim=1).unsqueeze(1).shape

torch.Size([50000, 1])

In [538]:
def log_softmax(x): 
    return (x.exp()/x.exp().sum(dim=-1).unsqueeze(1)).log()

In [539]:
pt_ls = log_softmax(pred_train)

In [544]:
test_near(F.log_softmax(pred_train, dim=-1), pt_ls)

In [583]:
def NLL_loss(pred, tar):
    #     -tar*pred
    return -pred[np.arange(pred.shape[0]), tar].mean()

In [584]:
loss = NLL_loss(pt_ls, y_train)

In [585]:
loss

tensor(2.3048, grad_fn=<NegBackward>)

In [586]:
nn.NLLLoss()(pt_ls, y_train)

tensor(2.3048, grad_fn=<NllLossBackward>)

In [587]:
# can use logsumexp trick to compute denominator without any overflow

In [588]:
F.nll_loss(F.log_softmax(pred_train,dim=-1), y_train)

tensor(2.3048, grad_fn=<NllLossBackward>)

###### Training loop

In [685]:
loss_func = nn.CrossEntropyLoss()

x_train.shape
bs = 64
lr = 0.5
epochs= 1

###### Basic loop

In [661]:
for e in range(epochs):
    idx=0
    for i in range(math.ceil(x_train.shape[0]/bs)):
        xb = x_train[idx:idx+bs, :]
        yb = y_train[idx:idx+bs]
        idx += bs
        pred_ls = m(xb)
        loss = loss_func(pred_ls, yb)
        loss.backward()

        with torch.no_grad():
            for l in m.layers:
                if hasattr(l, 'weight'):
                    l.weight -= lr*l.weight.grad
                    l.bias -= lr*l.bias.grad
                    l.weight.grad.zero_()
                    l.bias.grad.zero_()
    #         m.zero_grad()

In [662]:
accuracy(m(x_train), y_train)

tensor(0.9858)

###### Registering modules

In [717]:
class model1(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.l1 = nn.Linear(n_in, nh)
        self.l2 = nn.Linear(nh, n_out)
        self.relu = nn.ReLU()
        
    def __call__(self,x):
        return self.l2(self.relu(self.l1(x)))
#         return self.l2(F.relu(self.l1(x)))

In [718]:
m1 = model1(784, 50, 10)

In [719]:
m1

model1(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
  (relu): ReLU()
)

In [721]:
print(list(m1.named_children()), '...')
print(list(m1.named_modules()))

[('l1', Linear(in_features=784, out_features=50, bias=True)), ('l2', Linear(in_features=50, out_features=10, bias=True)), ('relu', ReLU())] ...
[('', model1(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
  (relu): ReLU()
)), ('l1', Linear(in_features=784, out_features=50, bias=True)), ('l2', Linear(in_features=50, out_features=10, bias=True)), ('relu', ReLU())]


In [704]:
m2 = model1(784, 50, 10)

In [705]:
m2

model1(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
)

In [716]:
print(list(m2.named_children()), '...')
print(list(m2.named_modules()))

[('l1', Linear(in_features=784, out_features=50, bias=True)), ('l2', Linear(in_features=50, out_features=10, bias=True))] ...
[('', model1(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
)), ('l1', Linear(in_features=784, out_features=50, bias=True)), ('l2', Linear(in_features=50, out_features=10, bias=True))]


###### Testing __setattr__

In [740]:
class test():
    def __init__(self, x1, x2):
        self.x1, self.x2 = x1, x2
    
    def __setattr__(self, k, v):
        print(f'seattr invoked : {k} : {v}')
        super().__setattr__(k,v)
#         pass

In [741]:
t = test('one', 'two')

seattr invoked : x1 : one
seattr invoked : x2 : two


In [742]:
t.x1

'one'

In [785]:
class dummymodel():
    def __init__(self, n_in, nh, n_out):
        self._modules = {}
        self.l1 = nn.Linear(n_in, nh)
        self.l2 = nn.Linear(nh, n_out)
        
    def __setattr__(self, k, v):
        if not k.startswith("_"): self._modules[k]=v
        super().__setattr__(k, v)
        
    def __repr__(self):
        return (f'{self._modules}')
        
    def __call__(self,x):
        return self.l2(F.relu(self.l1(x)))
    
    def named_parameters(self):
        for k, l in zip(self._modules.keys(), self._modules.values()):
            for p in l.parameters():
                yield k, p

In [789]:
dm = dummymodel(3, 4, 2)

In [790]:
dm

{'l1': Linear(in_features=3, out_features=4, bias=True), 'l2': Linear(in_features=4, out_features=2, bias=True)}

In [791]:
list(dm.named_parameters())

[('l1', Parameter containing:
  tensor([[ 0.2127, -0.2791, -0.0977],
          [ 0.2645, -0.4434, -0.1080],
          [-0.5036,  0.5682, -0.5582],
          [ 0.0379, -0.4723,  0.0550]], requires_grad=True)),
 ('l1', Parameter containing:
  tensor([-0.4856, -0.4348, -0.1240,  0.2457], requires_grad=True)),
 ('l2', Parameter containing:
  tensor([[-0.3092,  0.2318,  0.0410, -0.2967],
          [ 0.3150, -0.3290, -0.4729,  0.0037]], requires_grad=True)),
 ('l2', Parameter containing:
  tensor([0.2672, 0.0959], requires_grad=True))]

In [697]:
list(m.named_modules()), list(m.named_children()), list(m.named_parameters())

([('', Model())], [], [])

###### Registering modules

In [806]:
class Model(nn.Module):
    def __init__(self, ni, nh, no):
        super().__init__()
        self.layers = [nn.Linear(ni, nh), nn.ReLU(), nn.Linear(nh, no)]
        for c, i in enumerate(self.layers): self.add_module(f'layer_{c}', i)
    
    def __call__(self, x):
        for i in self.layers:
            x = i(x)
        return x

In [807]:
m3 = Model(784, 50, 10)

In [808]:
list(m3.named_modules()), list(m3.named_children())#, list(m3.named_parameters())

([('', Model(
     (layer_0): Linear(in_features=784, out_features=50, bias=True)
     (layer_1): ReLU()
     (layer_2): Linear(in_features=50, out_features=10, bias=True)
   )),
  ('layer_0', Linear(in_features=784, out_features=50, bias=True)),
  ('layer_1', ReLU()),
  ('layer_2', Linear(in_features=50, out_features=10, bias=True))],
 [('layer_0', Linear(in_features=784, out_features=50, bias=True)),
  ('layer_1', ReLU()),
  ('layer_2', Linear(in_features=50, out_features=10, bias=True))])

###### Using optim to refactor step and grad - Here we refactored params as well but params shud be done as above

In [693]:
class optimizer():
    def __init__(self, lr, model):
        self.lr = lr
        self.model = model
    
    def step(self):
        with torch.no_grad():
            for l in self.model.layers:
                if hasattr(l, 'weight'):
                    l.weight -= self.lr*l.weight.grad
                    l.bias -= self.lr*l.bias.grad
    def zero_grad(self):
        with torch.no_grad():
            for l in self.model.layers:
                if hasattr(l, 'weight'):
                    l.weight.grad.zero_()
                    l.bias.grad.zero_()

In [694]:
cop = optimizer(0.5, m)

In [695]:
for e in range(epochs+5):
    idx=0
    for i in range(math.ceil(x_train.shape[0]/bs)):
        xb = x_train[idx:idx+bs, :]
        yb = y_train[idx:idx+bs]
        idx += bs
        pred_ls = m(xb)
        loss = loss_func(pred_ls, yb)
        loss.backward()
        
        cop.step()
        cop.zero_grad()

In [696]:
accuracy(m(x_train), y_train)

tensor(0.9737)

###### Exploring torch nn module

In [439]:
from fastai.vision import *

In [442]:
m=models.resnet18()

In [438]:
m = nn.Sequential(nn.Conv2d(2,3,3),
                  nn.ReLU(),
                  nn.Conv2d(3,4,3),
                  nn.ReLU())

In [458]:
list(m.named_children())[5][1]

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(128, eps=1e-05, moment

In [461]:
list(m.named_modules())[1:]

[('conv1',
  Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)),
 ('bn1',
  BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
 ('relu', ReLU(inplace)),
 ('maxpool',
  MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)),
 ('layer1', Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv

In [466]:
m

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [468]:
for i,j in m.named_children():
    if isinstance(j, nn.BatchNorm2d):
        print(i, j)

bn1 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


In [489]:
for i in list(list(m.named_children())[0][1].named_children()):
    print(i)

In [480]:
def layer_match(m, match):
    for i,j in m.named_children():
        print(i, j)
        if isinstance(j, match):
            print(i, j)
        layer_match(j, match)

In [481]:
layer_match(models.resnet18(), nn.Conv2d)

conv1 Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
conv1 Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
bn1 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
relu ReLU(inplace)
maxpool MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
layer1 Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReL

###### DataLoader and Dataset

In [809]:
#export
class Dataset():
    def __init__(self, x, y): self.x,self.y = x,y
    def __len__(self): return len(self.x)
    def __getitem__(self, i): return self.x[i],self.y[i]

In [811]:
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

In [819]:
train_ds[0][0].shape, train_ds[0][1]

(torch.Size([784]), tensor(5))

In [821]:
train_ds[:5]

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]), tensor([5, 0, 4, 1, 9]))

In [872]:
# Dataloader
class Dataloader():
    def __init__(self, ds, bs):
        self.ds, self.bs = ds, bs
    def __iter__(self):
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]

In [873]:
train_dl = Dataloader(train_ds, bs=32)
valid_dl = Dataloader(valid_ds, bs=10)

In [892]:
x=iter(train_dl)

In [894]:
next(x)

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5, 9, 3, 3, 0, 7, 4, 9, 8,
         0, 9, 4, 1, 4, 4, 6, 0]))

In [1036]:
# sampler
class samp():
    def __init__(self, ds, bs):
        self.ds, self.bs = ds, bs
    def __iter__(self):
        self.idxs = torch.randperm(len(self.ds))
        print(self.idxs)
        for i in range(0, len(self.ds), self.bs):
            yield self.idxs[i:i+self.bs]

In [1042]:
# Testing indexing 
n=14
train_ds = Dataset(x_train[:n], y_train[:n])

In [1052]:
train_ds[:100][0].shape

torch.Size([14, 784])

In [1055]:
x=np.random.randn(3,4)
x

array([[ 1.8305  ,  1.238106,  0.883706,  1.364315],
       [ 0.973216,  1.233665, -0.009622, -0.318184],
       [ 0.46026 ,  0.027519,  0.051482,  0.311302]])

In [1062]:
x[:1000, :]

array([[ 1.8305  ,  1.238106,  0.883706,  1.364315],
       [ 0.973216,  1.233665, -0.009622, -0.318184],
       [ 0.46026 ,  0.027519,  0.051482,  0.311302]])

In [1043]:
t = samp(train_ds, 5)
t1 = iter(t)

In [1044]:
next(t1)

tensor([12,  5,  2,  6,  9,  8,  4,  7,  3,  1, 13,  0, 11, 10])


tensor([12,  5,  2,  6,  9])

In [1045]:
next(t1)

tensor([8, 4, 7, 3, 1])

In [1046]:
next(t1)

tensor([13,  0, 11, 10])

In [1047]:
next(t1)

StopIteration: 

In [1078]:
# sampler
class samp():
    def __init__(self, ds, bs):
        self.ds, self.bs = ds, bs
    def __iter__(self):
        self.idxs = torch.randperm(len(self.ds))
#         print(self.idxs)
        for i in range(0, len(self.ds), self.bs):
            yield self.idxs[i:i+self.bs]

In [1098]:
def collate(b):
    xs,ys = zip(*b)
    return torch.stack(xs),torch.stack(ys)

class Dataloader():
    def __init__(self, ds, bs, sampler, collate_fn=collate):
        self.ds, self.bs = ds, bs
        self.collate_fn = collate_fn
    def __iter__(self):
        samp = self.sampler(self.ds, self.bs)
        for i in samp: yield self.collate_fn([self.ds[i] for i in s])

In [1099]:
n=10
train_ds = Dataset(x_train[:n], y_train[:n])
dl = Dataloader(train_ds, 16, samp)
tl = iter(dl)

In [1100]:
next(tl)

AttributeError: 'Dataloader' object has no attribute 'sampler'

In [1091]:
next(tl)

StopIteration: 

In [1092]:
tl = iter(dl)
next(tl)

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([9, 1, 4, 1, 3, 1, 2, 0, 5, 4]))

In [1094]:
# Pytorch
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler