In [3]:
#export
import import_ipynb
import notebook2script_py

importing Jupyter notebook from notebook2script_py.ipynb


In [4]:
notebook2script_py.notebook2script('_02_fully_connected.ipynb')

Converted _02_fully_connected.ipynb to nb_02.py


In [5]:
#export
from exp.nb_02 import *
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib as mpl

In [6]:
#%load_ext autoreload
#%autoreload 2

#%matplotlib inline

In [7]:
mpl.rcParams['image.cmap'] = 'gray'

In [8]:
x_train , y_train , x_valid , y_valid = get_data()

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [9]:
n , m = x_train.shape
c = y_train.max() + 1
nh = 50

In [10]:
class Model(nn.Module):
    def __init__(self , n_in , nh , n_out):
        super().__init__()
        self.layers = [nn.Linear(n_in , nh) , nn.ReLU() , nn.Linear(nh , n_out)]

    def __call__(self , x):
        for l in self.layers:
            x = l(x)
        return x

In [11]:
model = Model(m, nh, 10)

In [12]:
pred = model(x_train)

**Cross entropy loss**

In [13]:
def log_softmax(x): return (x.exp() / (x.exp().sum(-1 , keepdim = True))).log()

In [14]:
sm_pred = log_softmax(pred)

In [15]:
y_train[:3]

tensor([5., 0., 4.])

In [16]:
sm_pred[[0,1,2] , [5,0,4]]

tensor([-2.2499, -2.3755, -2.1417], grad_fn=<IndexBackward>)

In [17]:
y_train.shape[0]

50000

In [18]:
y_train = y_train.type(torch.LongTensor)
y_valid = y_valid.type(torch.LongTensor)

In [19]:
def nll(input, target): return -input[torch.arange(input.size(0)) , target.type(torch.LongTensor)].mean()

In [20]:
loss = nll(sm_pred , y_train)

In [21]:
loss

tensor(2.3094, grad_fn=<NegBackward>)

In [22]:
def log_softmax(x): return x - x.exp().sum(-1 , keepdim = True).log()

In [23]:
def logsumexp(x):
    m = x.max(-1)[0]
    return m + (x - m[ : , None]).exp().sum(-1).log()

In [24]:
def log_softmax(x): return x - x.logsumexp(-1 , keepdim = True)

**Basic training loop**

In [25]:
loss_func = F.cross_entropy

In [26]:
#export
def accuracy(out , yb): return (torch.argmax(out , dim = 1) == yb).float().mean()

In [27]:
bs = 64                  # batch size

xb = x_train[0 : bs]     # a mini-batch from x
preds = model(xb)      # predictions
preds[0] , preds.shape

(tensor([-0.0633,  0.0672,  0.1929, -0.1295,  0.1997,  0.0622, -0.1033, -0.1364,
          0.0279, -0.0958], grad_fn=<SelectBackward>),
 torch.Size([64, 10]))

In [28]:
yb = y_train[0 : bs]
loss_func(preds , yb)

tensor(2.3190, grad_fn=<NllLossBackward>)

In [29]:
accuracy(preds, yb)

tensor(0.0938)

In [30]:
lr = 0.5   # learning rate
epochs = 1 # how many epochs to train for

In [31]:
for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i : end_i]
        yb = y_train[start_i : end_i]
        loss = loss_func(model(xb), yb)

        loss.backward()
    with torch.no_grad():
        for l in model.layers:
            if hasattr(l , 'weight'):
                l.weight -= l.weight.grad * lr
                l.bias   -= l.bias.grad * lr
                l.weight.grad.zero_()
                l.bias.grad.zero_()

In [32]:
loss_func(model(xb) , yb) , accuracy(model(xb) , yb)

(tensor(1153.0731, grad_fn=<NllLossBackward>), tensor(0.1875))

**Using parameters and optim**

**Parameters**

In [33]:
class Model(nn.Module):
    def __init__(self, n_in, nh, n_out):
        super().__init__()
        self.l1 = nn.Linear(n_in,nh)
        self.l2 = nn.Linear(nh,n_out)
        
    def __call__(self, x): return self.l2(F.relu(self.l1(x)))

In [34]:
model = Model(m , nh , 10)

In [35]:
for name,l in model.named_children(): print(f"{name}: {l}")

l1: Linear(in_features=784, out_features=50, bias=True)
l2: Linear(in_features=50, out_features=10, bias=True)


In [36]:
model

Model(
  (l1): Linear(in_features=784, out_features=50, bias=True)
  (l2): Linear(in_features=50, out_features=10, bias=True)
)

In [37]:
def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i : end_i]
            yb = y_train[start_i : end_i]
            loss = loss_func(model(xb), yb)

            loss.backward()
        with torch.no_grad():
            for p in model.parameters():
                p -= p.grad * lr
                model.zero_grad()

In [38]:
fit()
loss_func(model(xb) , yb) , accuracy(model(xb) , yb)

(tensor(29.7079, grad_fn=<NllLossBackward>), tensor(0.2500))

**Registering modules**

In [39]:
layers = [nn.Linear(m , nh) , nn.ReLU() , nn.Linear(nh , 10)]

In [40]:
class Model(nn.Module):
    def __init__(self , layers):
        super().__init__()
        self.layers = layers
        for i , l in enumerate(self.layers):
            self.add_module(f'layer_{i}' , l)

    def __call__(self , x):
        for l in self.layers:
            x = l(x)
            return x

In [41]:
model = Model(layers)

In [42]:
model

Model(
  (layer_0): Linear(in_features=784, out_features=50, bias=True)
  (layer_1): ReLU()
  (layer_2): Linear(in_features=50, out_features=10, bias=True)
)

**nn.ModuleList**

In [43]:
class SequentialModel(nn.Module):
    def __init__(self , layers):
        super().__init__()
        self.layers = nn.ModuleList(layers)

    def __call__(self , x):
        for l in self.layers:
            x = l(x)
        return x

In [44]:
model = SequentialModel(layers)

In [45]:
model

SequentialModel(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [46]:
fit()
loss_func(model(xb) , yb) , accuracy(model(xb) , yb)

(tensor(14.2915, grad_fn=<NllLossBackward>), tensor(0.3750))

**nn.Sequential**

In [47]:
model = nn.Sequential(nn.Linear(m , nh) , nn.ReLU() , nn.Linear(nh , 10))

In [48]:
fit()
loss_func(model(xb) , yb) , accuracy(model(xb) , yb)

(tensor(37.6614, grad_fn=<NllLossBackward>), tensor(0.3125))

**optim**

In [49]:
class Optimizer():
    def __init__(self , params , lr = 0.5):
        self.params , self.lr = list(params) , lr

    def step(self):
        with torch.no_grad():
            for p in self.params:
                p -= p.grad * self.lr

    def zero_grad(self):
        for p in self.params:
            p.grad.data.zero_()

In [50]:
model = nn.Sequential(nn.Linear(m , nh) , nn.ReLU() , nn.Linear(nh , 10))

In [51]:
opt = Optimizer(model.parameters())

In [52]:
for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

In [53]:
loss , acc = loss_func(model(xb) , yb) , accuracy(model(xb) , yb)
loss , acc

(tensor(0.1311, grad_fn=<NllLossBackward>), tensor(0.9375))

In [54]:
#export
from torch import optim

In [55]:
def get_model():
    model = nn.Sequential(nn.Linear(m , nh) , nn.ReLU() , nn.Linear(nh , 10))
    return model , optim.SGD(model.parameters() , lr = lr)

In [56]:
model , opt = get_model()
loss_func(model(xb) , yb)

tensor(2.3368, grad_fn=<NllLossBackward>)

In [57]:
for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i+bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred , yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

In [58]:
loss , acc = loss_func(model(xb) , yb) , accuracy(model(xb) , yb)
loss , acc

(tensor(0.0805, grad_fn=<NllLossBackward>), tensor(0.9375))

**Dataset and DataLoader**

**Dataset**

In [59]:
#export
class Dataset():
    def __init__(self , x , y):
        self.x , self.y = x , y

    def __len__(self):
        return len(self.x)

    def __getitem__(self , i):
        return self.x[i] , self.y[i]

In [60]:
train_ds , valid_ds = Dataset(x_train , y_train) , Dataset(x_valid , y_valid)
assert len(train_ds) == len(x_train)
assert len(valid_ds) == len(x_valid)

In [61]:
xb,yb = train_ds[0 : 5]
assert xb.shape == (5 , 28 * 28)
assert yb.shape == (5 , )
xb , yb

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([5, 0, 4, 1, 9]))

In [62]:
model , opt = get_model()

In [63]:
for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        xb , yb = train_ds[i * bs : i * bs + bs]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

In [64]:
loss,acc = loss_func(model(xb) , yb) , accuracy(model(xb) , yb)
assert acc > 0.7
loss , acc

(tensor(0.2612, grad_fn=<NllLossBackward>), tensor(0.9375))

**DataLoader**

In [65]:
class DataLoader():
    def __init__(self , ds , bs):
        self.ds , self.bs = ds , bs

    def __iter__(self):
        for i in range(0 , len(self.ds) , self.bs):
            yield self.ds[i : i + self.bs]

In [66]:
train_dl = DataLoader(train_ds , bs)
valid_dl = DataLoader(valid_ds , bs)

In [67]:
xb , yb = next(iter(valid_dl))
assert xb.shape == (bs , 28 * 28)
assert yb.shape == (bs ,)

In [68]:
model , opt = get_model()

In [69]:
def fit():
    for epoch in range(epochs):
        for xb , yb in train_dl:
            pred = model(xb)
            loss = loss_func(pred, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()

In [70]:
fit()

In [71]:
loss , acc = loss_func(model(xb) , yb) , accuracy(model(xb) , yb)
assert acc > 0.7
loss , acc

(tensor(0.1116, grad_fn=<NllLossBackward>), tensor(0.9688))

**Random sampling**

In [72]:
class Sampler():
    def __init__(self , ds , bs , shuffle = False):
        self.n , self.bs , self.shuffle = len(ds) , bs , shuffle

    def __iter__(self):
        self.idxs = torch.randperm(self.n) if self.shuffle else torch.arange(self.n)
        for i in range(0, self.n, self.bs): yield self.idxs[i:i+self.bs]

In [73]:
small_ds = Dataset(*train_ds[:10])

In [74]:
s = Sampler(small_ds,3,False)
[o for o in s]

[tensor([0, 1, 2]), tensor([3, 4, 5]), tensor([6, 7, 8]), tensor([9])]

In [75]:
s = Sampler(small_ds,3,True)
[o for o in s]

[tensor([5, 4, 9]), tensor([3, 6, 7]), tensor([8, 0, 2]), tensor([1])]

In [76]:
def collate(b):
    xs , ys = zip(*b)
    return torch.stack(xs) , torch.stack(ys)

class DataLoader():
    def __init__(self , ds , sampler , collate_fn = collate):
        self.ds , self.sampler , self.collate_fn = ds , sampler , collate_fn

    def __iter__(self):
        for s in self.sampler:
            yield self.collate_fn([self.ds[i] for i in s])

In [77]:
train_samp = Sampler(train_ds , bs , shuffle = True)
valid_samp = Sampler(valid_ds , bs , shuffle = False)

In [78]:
train_dl = DataLoader(train_ds , sampler = train_samp , collate_fn = collate)
valid_dl = DataLoader(valid_ds , sampler = valid_samp , collate_fn = collate)

In [79]:
model , opt = get_model()
fit()

loss ,acc = loss_func(model(xb) , yb) , accuracy(model(xb) , yb)
assert acc > 0.7
loss ,acc

(tensor(0.1227, grad_fn=<NllLossBackward>), tensor(0.9531))

In [80]:
#export
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler

In [81]:
#export
def get_dls(train_ds, valid_ds, bs, **kwargs):
    return (DataLoader(train_ds, batch_size=bs, shuffle=True, **kwargs),
            DataLoader(valid_ds, batch_size=bs*2, **kwargs))

# Reference for this notebook

In [1]:
#https://github.com/fastai/course-v3/blob/master/nbs/dl2/03_minibatch_training.ipynb