In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from exp.nb_01 import *

def get_data():
    path = datasets.download_data(MNIST_URL, ext='.gz')
    with gzip.open(path, 'rb') as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    return map(tensor, (x_train,y_train,x_valid,y_valid))

def normalize(x, mean, std):
    return (x - mean)/std

In [3]:
x_train,y_train,x_valid,y_valid = get_data()

In [4]:
x_train.mean(),x_train.std()

(tensor(0.1304), tensor(0.3073))

In [5]:
train_mean,train_std = x_train.mean(),x_train.std()

In [6]:
x_train = normalize(x_train, train_mean, train_std)
x_valid = normalize(x_valid, train_mean, train_std)

In [7]:
x_train.mean(), x_train.std()

(tensor(0.0001), tensor(1.))

In [8]:
# lets take a simple example of fully connected network i.e affine transformation
# equation is y = relu(wx + b); followed by non linear operation i.e relu or prelu

In [9]:
#export

from torch import nn
class LinearModel():
    def __init__(self, num_in, num_out):
        self.inp = None
        self.out = None
        self.wt = torch.randn([num_in, num_out]) / math.sqrt(num_in)
        self.b = torch.zeros([num_out])
    
    def __call__(self, inp):
        self.inp = inp
        self.out =  inp @ self.wt + self.b
        return self.out
    
    def backward(self, ):
        self.wt.grad = self.inp.t() @ self.out.grad
        self.inp.grad = self.out.grad @ self.wt.t()
        self.b.grad = self.out.grad.sum(0)

class Relu():
    def __init__(self,):
        self.inp = None
        self.out = None
    
    def __call__(self, inp):
        self.inp = inp
        self.out = inp.clamp_(0,)
        return self.out
    
    def backward(self, ):
        self.inp.grad = ( self.inp > 0 ).float() * self.out.grad 
        
class Mse():
    def __init__(self, ):
        self.inp = None
        self.out = None
    
    def __call__(self, output, target):
        self.inp = output
        self.out = target
        return((output.squeeze() - target).pow(2)).mean()
    
    def backward(self,):
        self.inp.grad = ((self.inp.squeeze() - self.out) / self.inp.shape[0]).unsqueeze(-1)

class Sequential():
    def __init__(self, lr=0.01, layers=[],  loss_fn=None,):
        self.models = []   
        self.loss_fn = loss_fn
        if not loss_fn:
            self.loss_fn = Mse()            
            
        self.lr = lr
        for i in range(len(layers) - 1):  
            self.models.append(LinearModel(layers[i], layers[i + 1]))
            self.models.append(Relu())
        self.models.append(LinearModel(layers[-1], 1))
    
    def __call__(self, inp, targ):
        x = inp
        for model in self.models:
            x = model(x)
        return self.loss_fn(x, targ)
    
    def backward(self, ):
        self.loss_fn.backward()
        for model in reversed(self.models):
            model.backward()
    
    def update(self, ):
        for model in self.models:
            if hasattr(model, "wt"):
                model.wt = model.wt - (self.lr * model.wt.grad)

In [124]:
class PyMod(nn.Module):
    def __init__(self, num_hidden=[]):
        super().__init__()
        layers = []
        for i in range(len(num_hidden) - 1):  
            l = nn.Linear(num_hidden[i], num_hidden[i + 1])
            layers.append(l)
            #self.add_module(f'layer_{i}', l)
            if (i + 1) < (len(num_hidden) - 1):
                layers.append(nn.ReLU())
        self.layers = nn.ModuleList(layers)
        
    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x
            

In [59]:
def log_softmax(x):return torch.log(x.exp() / (x.exp().sum(dim=-1,keepdim=True)))
def nll(input, target): return -input[range(target.shape[0]), target].mean()

In [50]:
x = torch.tensor([1,2,3]).float()
print(softmax(x[None,]).shape)

torch.Size([1, 3])


In [11]:
torch.functional.F.relu

<function torch.nn.functional.relu(input, inplace=False)>

In [60]:
if __name__ == "__main__":
    lr = 0.01
    epoch = 1
    y_train, y_valid = y_train.float(), y_valid.float()
    seq = Sequential(lr=lr, layers=[784, 50])
    for i in range(epoch):
        loss = seq(x_train, y_train)
        if i % 10 == 0:
            print("loss ", loss)

        seq.backward()
        seq.update()

loss  tensor(32.0264)


In [136]:
def fit(model, bs, num_epochs):
    for i in range(num_epochs):
        for j in range((n-1)//bs + 1):
            start_j = j*bs
            end_j = start_j+bs
            xb = x_train[start_j:end_j]
            yb = y_train[start_j:end_j]    
            pred = model(xb)
            l = log_softmax(pred)
            loss = nll(log_softmax(pred), yb)
            loss.backward()
            with torch.no_grad():
                for l in model.layers():
                    if hasattr(l, 'weight'):
                        l.weight -= l.weight.grad * lr
                        l.bias   -= l.bias.grad   * lr                
                        l.weight.grad.zero_()
                        l.bias.grad.zero_()                
        if i % 10 == 0:
            print("loss ", loss)

In [None]:
def fit(model, bs, num_epochs):
    for i in range(num_epochs):
        for j in range((n-1)//bs + 1):
            start_j = j*bs
            end_j = start_j+bs
            xb = x_train[start_j:end_j]
            yb = y_train[start_j:end_j]    
            pred = model(xb)
            l = log_softmax(pred)
            loss = nll(log_softmax(pred), yb)
            loss.backward()
            with torch.no_grad():
                for p in model.parameters(): 
                    p -= p.grad * lr
                    p.grad.zero_()
        if i % 10 == 0:
            print("loss ", loss)

In [149]:
from torch import optim

In [151]:
def fit(model, lr, bs, num_epochs):
    opt = optim.SGD(model.parameters(), lr)
    for i in range(num_epochs):
        for j in range((n-1)//bs + 1):
            start_j = j*bs
            end_j = start_j+bs
            xb = x_train[start_j:end_j]
            yb = y_train[start_j:end_j]    
            pred = model(xb)
            l = log_softmax(pred)
            loss = nll(log_softmax(pred), yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if i % 10 == 0:
            print("loss ", loss)

In [115]:
for name,l in pyMod.named_children(): 
    print(f"{name}: {l}")

layers: ModuleList(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)


In [138]:
pyMod = PyMod(num_hidden=[784, 50, 10])
n = x_train.shape[0]
lr = 0.01
bs = 64
num_epochs = 11
fit(pyMod, bs, num_epochs)

loss  tensor(0.7642, grad_fn=<NegBackward>)
loss  tensor(0.0898, grad_fn=<NegBackward>)


In [152]:
model = nn.Sequential(nn.Linear(784,50), nn.ReLU(), nn.Linear(50,10))
n = x_train.shape[0]
lr = 0.01
bs = 64
num_epochs = 11
fit(model, lr, bs, num_epochs)

loss  tensor(0.7746, grad_fn=<NegBackward>)
loss  tensor(0.1493, grad_fn=<NegBackward>)


In [14]:
!./notebook2script.py yadl.ipynb

Converted yadl.ipynb to nb_yadl.py
