Skip to content

Commit

Permalink
defaults to float64 for cpu tests because of numerical stability on cpu
Browse files Browse the repository at this point in the history
  • Loading branch information
tfjgeorge committed Sep 15, 2020
1 parent d114ae5 commit 6cfb3b1
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 27 deletions.
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import torch
import pytest


@pytest.fixture(scope="session", autouse=True)
def default_to_float64_on_cpu(request):
if not torch.cuda.is_available():
torch.set_default_dtype(torch.float64)
torch.set_default_tensor_type(torch.DoubleTensor)
22 changes: 15 additions & 7 deletions tests/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@

if torch.cuda.is_available():
device = 'cuda'

def to_device(tensor):
return tensor.to(device)
else:
device = 'cpu'

# on cpu we need to use double as otherwise ill-conditioning in sums
# causes numerical instability
def to_device(tensor):
return tensor.double()

class FCNet(nn.Module):
def __init__(self, in_size=10, out_size=10, n_hidden=2, hidden_size=15,
nonlinearity=nn.ReLU, normalization='none'):
Expand Down Expand Up @@ -90,7 +98,7 @@ def get_linear_fc_task():
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(),
Expand Down Expand Up @@ -122,7 +130,7 @@ def get_linear_conv_task():
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(),
Expand Down Expand Up @@ -156,7 +164,7 @@ def get_batchnorm_fc_linear_task():
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

lc_full = LayerCollection.from_model(net)
layer_collection = LayerCollection()
Expand Down Expand Up @@ -197,7 +205,7 @@ def get_batchnorm_conv_linear_task():
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

lc_full = LayerCollection.from_model(net)
layer_collection = LayerCollection()
Expand Down Expand Up @@ -249,7 +257,7 @@ def get_batchnorm_nonlinear_task():
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(),
Expand All @@ -274,7 +282,7 @@ def get_fullyconnect_task(normalization='none'):
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(),
Expand All @@ -296,7 +304,7 @@ def get_conv_task(normalization='none'):
net.to(device)

def output_fn(input, target):
return net(input.to(device))
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(),
Expand Down
4 changes: 2 additions & 2 deletions tests/test_jacobian_ekfac.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from nngeometry.generator.jacobian import Jacobian
from nngeometry.object.pspace import PMatBlockDiag, PMatKFAC, PMatEKFAC
import torch
from tasks import get_fullyconnect_task, get_conv_task
from tasks import get_fullyconnect_task, get_conv_task, device
from nngeometry.object.vector import random_pvector
from utils import check_ratio, check_tensors

Expand Down Expand Up @@ -55,7 +55,7 @@ def test_pspace_ekfac_vs_direct():
n_output=n_output)

M_ekfac = PMatEKFAC(generator)
v = random_pvector(lc, device='cuda')
v = random_pvector(lc, device=device)

# the second time we will have called update_diag
for i in range(2):
Expand Down
48 changes: 30 additions & 18 deletions tests/test_jacobian_kfac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@
from tasks import get_fullyconnect_task


if torch.cuda.is_available():
device = 'cuda'

def to_device(tensor):
return tensor.to(device)
else:
device = 'cpu'

# on cpu we need to use double as otherwise ill-conditioning in sums
# causes numerical instability
def to_device(tensor):
return tensor.double()


class Net(nn.Module):
def __init__(self, in_size=10, out_size=10, n_hidden=2, hidden_size=25,
nonlinearity=nn.ReLU):
Expand Down Expand Up @@ -62,24 +76,33 @@ def forward(self, x):
def get_fullyconnect_kfac_task(bs=300):
train_set = get_dataset('train')
train_set = Subset(train_set, range(1000))
train_set = to_onexdataset(train_set, 'cuda')
train_set = to_onexdataset(train_set, device)
train_loader = DataLoader(
dataset=train_set,
batch_size=bs,
shuffle=False)

net = Net(in_size=10)
net.to('cuda')
net.to(device)

def output_fn(input, target):
input = input.to('cuda')
return net(input)
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(), net,
output_fn, 10)


def to_onexdataset(dataset, device):
# this weird dataset only uses a single input x repeated, it is only
# designed to test kfac since in this case KFAC and regular Fisher
# are the same
loader = torch.utils.data.DataLoader(dataset, len(dataset))
x, t = next(iter(loader))
x = x[0, :].repeat(x.size(0), 1)
return torch.utils.data.TensorDataset(x.to(device), t.to(device))


def get_convnet_kfc_task(bs=300):
train_set = datasets.MNIST(root=default_datapath,
train=True,
Expand All @@ -91,27 +114,16 @@ def get_convnet_kfc_task(bs=300):
batch_size=bs,
shuffle=False)
net = ConvNet()
net.to('cuda')
net.to(device)

def output_fn(input, target):
input = input.to('cuda')
return net(input)
return net(to_device(input))

layer_collection = LayerCollection.from_model(net)
return (train_loader, layer_collection, net.parameters(), net,
output_fn, 10)


def to_onexdataset(dataset, device):
# this weird dataset only uses a single input x repeated, it is only
# designed to test kfac since in this case KFAC and regular Fisher
# are the same
loader = torch.utils.data.DataLoader(dataset, len(dataset))
x, t = next(iter(loader))
x = x[0, :].repeat(x.size(0), 1)
return torch.utils.data.TensorDataset(x.to(device), t.to(device))


def test_jacobian_kfac_vs_pblockdiag():
"""
Compares blockdiag and kfac representation on datasets/architectures
Expand Down Expand Up @@ -162,7 +174,7 @@ def test_jacobian_kfac():
M_kfac.get_diag(split_weight_bias=True))

# sample random vector
random_v = random_pvector(lc, 'cuda')
random_v = random_pvector(lc, device)

# Test mv
mv_direct = torch.mv(G_kfac_split, random_v.get_flat_representation())
Expand Down

0 comments on commit 6cfb3b1

Please sign in to comment.