In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import copy
from dbclass import TrainDB

n_epochs = 3
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 60000/(10*batch_size_train)

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('drive/My Drive/mnist/MNIST_data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=False)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('drive/My Drive/mnist/MNIST_data/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)


network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                      momentum=momentum)

#net1 = Net()
#net2 = Net()

net1 = copy.deepcopy(network)
net2 = copy.deepcopy(network)
net3 = copy.deepcopy(network) 


train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]



In [2]:
db = TrainDB(network,train_loader, torch.nn.functional.nll_loss,batchfreq=1)

In [None]:
def train(epoch):
  network.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    if batch_idx%50==0:
        print(batch_idx)
    grad_vec = None
    prev_state = copy.deepcopy(network.state_dict())
    optimizer.zero_grad()
    output = network(data)
    loss = F.nll_loss(output, target)
    loss.backward(create_graph=True,retain_graph=True)
    grads = []
    #print(target)
    for param in network.parameters():
        grads.append(param.grad.view(-1))
    #grads = torch.autograd.grad(
    #            loss, network.parameters(), create_graph=True
    #        )
    #grad_vec = torch.cat([g.contiguous().view(-1) for g in grads])
    #print('Norm of grad')
    #if batch_idx <20:
    #    print(grad_vec)
    #print(torch.norm(grad_vec/32.0))
    grad_vec = 0
    optimizer.step()
    db.step(epoch,batch_idx,prev_state,network,grads,loss.item())
    #print(db.tdiffnorm)
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
  return

def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


test()
n_epochs = 1
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
#db.finnetwork = network

In [None]:
table1 = db.tweight
table3 = db.tdiffnorm
table2 = db.tnorm

table3[['conv1.weight', 'conv2.weight', 'fc1.weight', 'fc2.weight']][1:].plot()
table2[['conv1.weight', 'conv2.weight', 'fc1.weight', 'fc2.weight']].plot()
#table3

In [3]:
#db.ithdiffnorm('conv1.weight',2,2)
#db.tweight
db.criterion = torch.nn.functional.nll_loss
db.ithhess_eigenval(1,1)



[hessian_eigenthings] beginning deflated power iteration
[hessian_eigenthings] computing eigenvalue/vector 1 of 10
tensor([ 2.8287,  3.1428,  3.0940,  ..., -6.2758,  1.2969,  3.3169]) tensor([1., 1., 1.,  ..., 1., 1., 1.])
Step 0 - Time taken = 146 seconds
tensor([ 0.0266,  0.0307,  0.0337,  ..., -0.1043,  0.0110,  0.0690]) tensor([ 0.0189,  0.0210,  0.0207,  ..., -0.0420,  0.0087,  0.0222])                                            1/20 
Step 1 - Time taken = 152 seconds
tensor([ 0.0139,  0.0185,  0.0248,  ..., -0.1225,  0.0047,  0.0962]) tensor([ 0.0124,  0.0144,  0.0158,  ..., -0.0487,  0.0052,  0.0322])                                            2/20 
Step 2 - Time taken = 143 seconds
tensor([ 0.0022,  0.0061,  0.0139,  ..., -0.1363, -0.0018,  0.1213]) tensor([ 0.0059,  0.0078,  0.0105,  ..., -0.0519,  0.0020,  0.0408])                                            3/20 
Step 3 - Time taken = 143 seconds

KeyboardInterrupt: 

In [None]:
network2 = copy.deepcopy(db.currnetwork)
'''build network using current weights at iteration i'''
        #for param in network.parameters():
        #    network[param] = torch.from_numpy(self.ithweight(param,epoch,batch_id))
        #    network[param].requires_grad = True


        #eigenvals, eigenvecs = compute_hessian_eigenthings(self.network, self.lgrad[self.genind(epoch,batch_id)],num_eigenthings=k,power_iter_steps=100)
        #for i, (inputs, targets) in enumerate(self.dataloader):
        #    inputs, targets = inputs.to(device=self.device, dtype=self.dtype), targets.to(self.device)
        #    loss = self.criterion(network(inputs), targets)
        #    grad_seq = torch.autograd.grad(loss, network.parameters(),only_inputs=True, create_graph=True, retain_graph=True)

network2.zero_grad()
for batch_idx, (data, target) in enumerate(db.dataloader):
    output = network2(data)
    loss2 = db.criterion(output, target)
    loss2.backward(create_graph=True,retain_graph=True)
grads = []
for param in network2.parameters():
    grads.append(param.grad.view(-1))
#grad_vec = torch.cat([g.contiguous().view(-1) for g in grads])


In [None]:
grads = []
for param in network2.parameters():
    grads.append(param.grad)

In [None]:
for g in grads:
    print(g.shape)

In [None]:
from hvp import *

hess2 = HessianOperator(network2, grads)
#self.lgrad[self.genind(epoch,batch_id)]
#eigenvalue_analysis(hess,k=1,max_iter=20)

In [None]:
from hessian_eigenthings_orig.hvp_operator import compute_hessian_eigenthings

criterion = torch.nn.NLLLoss()
eigvec,eigenvalues = compute_hessian_eigenthings(network,train_loader, criterion, full_dataset=False,use_gpu=False)

In [None]:
#network = db.finnetwork
from operator import add
grads = None
def train2(epoch):
#  network.train()
    grads = None
    for batch_idx, (data, target) in enumerate(train_loader):
        grad_vec = None
        prev_state = copy.deepcopy(network.state_dict())
        optimizer.zero_grad()
        output = network(data)
        loss = F.nll_loss(output, target)
        #loss.backward(create_graph=True,retain_graph=True)
        #grads = []
        #for param in network.parameters():
        #    grads.append(param.grad.view(-1))
        if grads is None:
            grads = torch.autograd.grad(
                    loss, network.parameters(), create_graph=True,retain_graph=True
                )
        else:
            grads = list( map(add, torch.autograd.grad(
                    loss, network.parameters(), create_graph=True,retain_graph=True), grads)  )
        optimizer.step()
        db.step(epoch,1874+batch_idx,prev_state,network,grads,loss.item())
        #print(db.tdiffnorm)
        if batch_idx % log_interval == 0:
          print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))
          train_losses.append(loss.item())
          train_counter.append(
            (batch_idx*batch_size_train) + ((epoch-1)*len(train_loader.dataset)))
    return

In [None]:
train2(1)


In [None]:
db.ithhess_eigenval(1,2108)
#db.tnorm

In [4]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('drive/My Drive/mnist/MNIST_data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=64, shuffle=False)
from hessian_eigenthings2.hvp_operator import compute_hessian_eigenthings

criterion = F.nll_loss
eigvec,eigenvalues = compute_hessian_eigenthings(net1,train_loader, criterion, use_gpu=False,power_iter_steps=20)

[hessian_eigenthings] beginning deflated power iteration
[hessian_eigenthings] computing eigenvalue/vector 1 of 10




tensor([ 2.7872,  3.0559,  2.9772,  ..., -6.3063,  1.2135,  3.3449]) tensor([1., 1., 1.,  ..., 1., 1., 1.])
Step 0 - Time taken = 82 seconds
tensor([ 0.0261,  0.0303,  0.0334,  ..., -0.1044,  0.0124,  0.0680]) tensor([ 0.0190,  0.0208,  0.0203,  ..., -0.0429,  0.0083,  0.0228])                                            1/20 
Step 1 - Time taken = 77 seconds
tensor([ 0.0125,  0.0169,  0.0235,  ..., -0.1232,  0.0061,  0.0981]) tensor([ 0.0122,  0.0141,  0.0156,  ..., -0.0487,  0.0058,  0.0318])                                            2/20 
Step 2 - Time taken = 78 seconds
tensor([ 0.0005,  0.0037,  0.0112,  ..., -0.1350, -0.0019,  0.1268]) tensor([ 0.0053,  0.0072,  0.0100,  ..., -0.0523,  0.0026,  0.0417])                                            3/20 
Step 3 - Time taken = 79 seconds
tensor([-0.0093, -0.0069,  0.0013,  ..., -0.1451, -0.0092,  0.1499]) tensor([ 0.0002,  0.0015,  0.0045,  ..., -0.0539, -0.0008,  0.0506])                                            4/20 
Step 4 - Tim

KeyboardInterrupt: 

In [None]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('drive/My Drive/mnist/MNIST_data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=64, shuffle=False)
from hessian_eigenthings_orig.hvp_operator import compute_hessian_eigenthings

criterion = F.nll_loss
eigvec,eigenvalues = compute_hessian_eigenthings(network2,train_loader, criterion, full_dataset=True,use_gpu=False,power_iter_steps=100)

In [None]:
from power_iter import Operator, deflated_power_iteration, smallest_eigenvalue
from scipy.sparse.linalg import LinearOperator, eigsh
import scipy
def eigenvalue_analysis2(operator, k=1, tol=1e-6, max_iter=100, quiet=False):
    """Return largest EV in magnitude and smallest algebraic eigenvalue."""
    eigmax, eigmin = smallest_eigenvalue(operator.hvp_op,
                                       power_iter_steps=max_iter,
                                       power_iter_err_threshold=tol,
                                       momentum=0.0,
                                       device=operator.device, quiet=False)
    return eigmax, eigmin

In [None]:
eigmax, eigmin = eigenvalue_analysis2(hess2,max_iter=20)