In [11]:
import torch
import torch.nn as nn
import torch
import numpy as np
from torch.optim.optimizer import Optimizer

device = torch.device('cuda')

def gradient(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False):
    '''
    Compute the gradient of `outputs` with respect to `inputs`

    gradient(x.sum(), x)
    gradient((x * y).sum(), [x, y])
    '''
    if torch.is_tensor(inputs):
        inputs = [inputs]
    else:
        inputs = list(inputs)
    grads = torch.autograd.grad(outputs, inputs, grad_outputs,
                                allow_unused=True,
                                retain_graph=retain_graph,
                                create_graph=create_graph)
    grads = [x if x is not None else torch.zeros_like(y) for x, y in zip(grads, inputs)]
    return torch.cat([x.contiguous().view(-1) for x in grads])


def jacobian(outputs, inputs, create_graph=False,retain_graph=True):
    '''
    Compute the Jacobian of `outputs` with respect to `inputs`

    jacobian(x, x)
    jacobian(x * y, [x, y])
    jacobian([x * y, x.sqrt()], [x, y])
    '''
    if torch.is_tensor(outputs):
        outputs = [outputs]
    else:
        outputs = list(outputs)

    if torch.is_tensor(inputs):
        inputs = [inputs]
    else:
        inputs = list(inputs)

    jac = []
    for output in outputs:
        output_flat = output.view(-1)
        output_grad = torch.zeros_like(output_flat)
        for i in range(len(output_flat)):
            output_grad[i] = 1
            jac += [gradient(output_flat, inputs, output_grad, retain_graph, create_graph)]
            output_grad[i] = 0
    return torch.stack(jac)



In [12]:
inputs = torch.randn(10, requires_grad=True)
net = nn.Linear(10, 3)
outputs = net(inputs)
outputs_flat = outputs.view(-1)
outputs_grad = torch.zeros_like(outputs_flat)
jac = []
for i in range(len(outputs_flat)):
    outputs_grad[i] = 1
    jac += [gradient(outputs_flat, inputs, outputs_grad, retain_graph=True, create_graph=False)]
    outputs_grad[i] = 0

torch.stack(jac).shape

torch.Size([3, 10])

### Customized Levenberg-Marquardt Optimizer

In [56]:
class LM(Optimizer):
    '''
    Arguments:
        lr: learning rate (step size) default:1
        alpha: the hyperparameter in the regularization default:0.2
    '''
    def __init__(self, params, lr=1, alpha=0.2):
        defaults = dict(
            lr = lr,
            alpha = alpha
        )
        super(LM, self).__init__(params, defaults)

        if len(self.param_groups) != 1:
            raise ValueError ("LM doesn't support per-parameter options")    
    
    def step(self, closure):
        '''
        performs a single step
        in the closure: we evaluate the diff

        '''
        assert len(self.param_groups) == 1
        group = self.param_groups[0]
        lr = group['lr']
        alpha = group['alpha']
        params = group['params']
        diff = closure().float()
        # calculate Jacobian
        J = jacobian(diff, params, create_graph=True, retain_graph=True)
        prev_loss = torch.mean(diff.detach() ** 2)
        print (prev_loss.item())
        # approximate Hessian
        H = torch.matmul(J.T, J) + torch.eye(J.shape[-1]).to(device) * alpha
        # calculate the update       
        delta_w = -1 * torch.matmul(torch.inverse(H), torch.matmul(J.T, diff)).detach()
        offset = 0
        for p in group['params']:
            numel = p.numel()
            p = p + lr * delta_w[offset:offset + numel].view_as(p)
            offset += numel
        diff = closure().float()
        loss = torch.mean(diff.detach() ** 2)
        print (loss.item())
        if loss < prev_loss:
            print ('successful iteration')
            if alpha > 1e-5:
                group['alpha'] /= 10
        else:
            print ('failed iteration')
            if alpha < 1e5:
                group['alpha'] *= 10
            # undo the step
            offset = 0
            for p in group['params']:    
                numel = p.numel()
                p = p - lr * delta_w[offset:offset + numel].view_as(p)
                offset += numel
        
        

In [57]:
net = torch.nn.Sequential(
        torch.nn.Linear(1, 50),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(50, 20),
        torch.nn.LeakyReLU(),
        torch.nn.Linear(20, 1),
    )
net.cuda('cuda:0')

optimizer = LM(net.parameters())
BATCH_SIZE = 64
EPOCH = 200
x = torch.unsqueeze(torch.linspace(-10, 10, 1000), dim=1)  
y = torch.sin(x) + 0.2*torch.rand(x.size())
data, target = x.to('cuda:0'), y.to('cuda:0')


In [58]:
n_iter=100

for iter in range(n_iter):
    print ('iter:{}'.format(iter))
    def closure():
        optimizer.zero_grad()
        out = net(data)
        diff = (out - target).squeeze()
        return diff
    optimizer.step(closure)


iter:0
0.8467307686805725
0.8467307686805725
failed iteration
iter:1
0.8467307686805725
0.8467307686805725
failed iteration
iter:2
0.8467307686805725
0.8467307686805725
failed iteration
iter:3
0.8467307686805725
0.8467307686805725
failed iteration
iter:4
0.8467307686805725
0.8467307686805725
failed iteration
iter:5
0.8467307686805725
0.8467307686805725
failed iteration
iter:6
0.8467307686805725


KeyboardInterrupt: 

In [1]:
def getClosestFactor(target, number):
    for  i in range(number):
        if (number % (target + i) == 0):
            return target + i
        elif (number % (target - i) == 0):
            return target - i
    return number

In [3]:
getClosestFactor(10,20061)

9

In [4]:
20061/9

2229.0

In [7]:
import torch
from torch.autograd import Variable

x = torch.unsqueeze(torch.linspace(-10, 10, 1000), dim=1)  # x data (tensor), shape=(100, 1)
y = torch.sin(x) + 0.2*torch.rand(x.size())                 # noisy y data (tensor), shape=(100, 1)
x, y = Variable(x), Variable(y)

In [8]:
x.shape

torch.Size([1000, 1])

In [9]:
tnestedtensor([
 tensor([8, 1, 3, 4]),
 tensor([5, 0, 9])
]) 

NameError: name 'tnestedtensor' is not defined