In [1]:
import gpytorch
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torchvision import transforms

gpytorch.functions.use_toeplitz = False

In [107]:
class FeatureExtractor(nn.Sequential):
    
    def __init__(self):
        super(FeatureExtractor, self).__init__(nn.Conv2d(1, 32, kernel_size=5, padding=2),
                                 nn.BatchNorm2d(32),
                                 nn.ReLU(),
                                 nn.MaxPool2d(2, 2),
                                 nn.Conv2d(32, 64, kernel_size=5, padding=2),
                                 nn.BatchNorm2d(64),
                                 nn.ReLU(),
                                 nn.MaxPool2d(2, 2))
        
class Bottleneck(nn.Sequential):
    
    def __init__(self):
        super(Bottleneck, self).__init__(nn.Linear(64*7*7, 128),
                                         nn.BatchNorm1d(128),
                                 nn.ReLU(),
                                 nn.Linear(128, 128),
                                 nn.BatchNorm1d(128),
                                 nn.ReLU(),
                                 nn.Linear(128,2),
                                 nn.BatchNorm1d(2))

class LeNet(nn.Module):
    
    def __init__(self):
        super(LeNet, self).__init__()
        self.feature_extractor = FeatureExtractor()
        self.bottleneck = Bottleneck()
        self.final_layer = nn.Sequential(
                                 nn.ReLU(),
                                 nn.Linear(2,10))
    
    def forward(self, x):
        features = self.feature_extractor(x)
        bottlenecked_features = self.bottleneck(features.view(-1, 64 * 7 * 7))
        classification = self.final_layer(bottlenecked_features)
        return classification
        

In [108]:
train_mnist = torchvision.datasets.MNIST('/tmp', train=True,
                                         download=True, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))                                              
test_mnist = torchvision.datasets.MNIST('/tmp', train=False,
                                        download=True, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))

In [144]:
train_data_loader = torch.utils.data.DataLoader(train_mnist, shuffle=True, pin_memory=True, batch_size=256)

In [145]:
criterion = nn.CrossEntropyLoss().cuda()

In [146]:
model = LeNet().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [147]:
num_epochs = 3
for i in range(num_epochs):
    for x, y in train_data_loader:
        x = Variable(x.cuda())
        y = Variable(y.cuda())
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
    print("Loss: ", loss.data[0])
    

('Loss: ', 0.6452860832214355)
('Loss: ', 0.7517046332359314)
('Loss: ', 1.1878867149353027)


In [153]:
list(model.bottleneck.modules())[-1].weight.data.fill_(1)


 1
 1
[torch.cuda.FloatTensor of size 2 (GPU 0)]

In [149]:
from gpytorch.kernels import RBFKernel, GridInterpolationKernel

class DeepKernel(gpytorch.Module):
    def __init__(self, model):
        super(DeepKernel, self).__init__()
        self.feature_extractor = model.feature_extractor
        self.bottleneck = model.bottleneck
        self.gp_layer = GPLayer()
        
    def forward(self, x):
        print('starting forward')
        features = self.feature_extractor(x)
        bottlenecked_features = self.bottleneck(features.view(-1, 64 * 7 * 7))
        print('gp layer')
        gp_output = self.gp_layer(bottlenecked_features)
        print('done')
        return gp_output
    
    
class LatentFunction(gpytorch.GridInducingPointModule):
    def __init__(self):
        super(LatentFunction, self).__init__(grid_size=100, grid_bounds=[(-5, 10)])
        cov_module = RBFKernel()
        cov_module.initialize(log_lengthscale=2)
        self.cov_module = cov_module
        
    def forward(self, x):
        mean = Variable(x.data.new(len(x)).zero_())
        covar = self.cov_module(x)
        return gpytorch.random_variables.GaussianRandomVariable(mean, covar)

    
class GPLayer(gpytorch.GPModel):
    def __init__(self, n_dims=2):
        super(GPLayer, self).__init__(gpytorch.likelihoods.BernoulliLikelihood())
        self.latent_functions = [LatentFunction() for i in xrange(n_dims)]
        for i, latent_function in enumerate(self.latent_functions):
            self.add_module('latent_function_%d' % i, latent_function)
    
    def forward(self, x):
        outputs = [latent_function(x[:, i].contiguous()) for i, latent_function in enumerate(self.latent_functions)]
        res = sum(outputs)
        return res
    

In [150]:
deep_kernel = DeepKernel(model).cuda()

In [151]:
gp_data_loader = torch.utils.data.DataLoader(train_mnist, batch_size=128, pin_memory=True)

In [154]:
# Find optimal model hyperparameters
deep_kernel.train()
optimizer = torch.optim.Adam(deep_kernel.gp_layer.parameters(), lr=0.01)
optimizer.n_iter = 0
for i in range(20):
    for j, (train_x_batch, train_y_batch) in enumerate(gp_data_loader):
        train_x_batch = Variable(train_x_batch).cuda()
        train_y_batch = Variable(train_y_batch.fmod(2)).cuda()
        optimizer.zero_grad()
        output = deep_kernel(train_x_batch)
        loss = -deep_kernel.gp_layer.marginal_log_likelihood(output, train_y_batch.float())
        loss.backward()
        optimizer.n_iter += 1
        print('Iter %d/200 - Loss: %.3f' % (
            i + 1, loss.data[0],
        ))
        optimizer.step()
    
# Set back to eval mode
model.eval()
None

starting forward
gp layer
done
Iter 1/200 - Loss: 407072.656
starting forward
gp layer
done
Iter 1/200 - Loss: 382232.250
starting forward
gp layer
done
Iter 1/200 - Loss: 354490.812
starting forward
gp layer
done
Iter 1/200 - Loss: 327162.688
starting forward
gp layer
done
Iter 1/200 - Loss: 286836.312
starting forward
gp layer
done
Iter 1/200 - Loss: 307030.531
starting forward
gp layer
done
Iter 1/200 - Loss: 217781.125
starting forward
gp layer
done
Iter 1/200 - Loss: 154102.422
starting forward
gp layer
done
Iter 1/200 - Loss: 123153.773
starting forward
gp layer
done
Iter 1/200 - Loss: 157142.000
starting forward
gp layer
done
Iter 1/200 - Loss: 150595.266
starting forward
gp layer
done
Iter 1/200 - Loss: 105468.281
starting forward
gp layer
done
Iter 1/200 - Loss: 127705.180
starting forward
gp layer
done
Iter 1/200 - Loss: 65342.324
starting forward
gp layer
done
Iter 1/200 - Loss: 71181.867
starting forward
gp layer
done
Iter 1/200 - Loss: 71344.164
starting forward
gp layer
d

KeyboardInterrupt: 

In [155]:
deep_kernel.gp_layer.condition(Variable(torch.randn(2, 2)).cuda(), Variable(torch.zeros(2)).cuda())

GPLayer (
  (likelihood): BernoulliLikelihood (
  )
  (latent_function_0): LatentFunction (
    (cov_module): GridInterpolationKernel (
      (base_kernel_module): RBFKernel (
      )
    )
  )
  (latent_function_1): LatentFunction (
    (cov_module): GridInterpolationKernel (
      (base_kernel_module): RBFKernel (
      )
    )
  )
)

In [156]:
deep_kernel.eval()
test_data_loader = torch.utils.data.DataLoader(test_mnist, shuffle=False, pin_memory=True, batch_size=256)

for test_batch_x, test_batch_y in test_data_loader:
    predictions = deep_kernel(Variable(test_batch_x).cuda()).probability.round()
    test_batch_y = Variable(test_batch_y.fmod(2)).cuda().float()
    print(torch.eq(predictions, test_batch_y).float().mean().data[0])


starting forward
gp layer
done
0.6171875
starting forward
gp layer
done
0.6015625
starting forward
gp layer
done
0.52734375
starting forward
gp layer
done
0.578125
starting forward
gp layer
done
0.578125
starting forward
gp layer
done
0.5703125
starting forward
gp layer
done
0.6171875
starting forward
gp layer
done
0.58203125
starting forward
gp layer
done
0.58984375
starting forward
gp layer
done
0.578125
starting forward
gp layer
done
0.61328125
starting forward
gp layer
done
0.62109375
starting forward
gp layer
done
0.55859375
starting forward
gp layer
done
0.546875
starting forward
gp layer
done
0.59765625
starting forward
gp layer


KeyboardInterrupt: 

In [81]:
from torch.nn import functional as F

model.eval()
test_data_loader = torch.utils.data.DataLoader(test_mnist, shuffle=False, pin_memory=True, batch_size=256)

for test_batch_x, test_batch_y in test_data_loader:
    _, predictions = model(Variable(test_batch_x).cuda()).max(dim=1)
    test_batch_y = Variable(test_batch_y).cuda()
    print(torch.eq(predictions, test_batch_y).float().mean().data[0])


0.9921875
0.96484375
0.9765625
0.984375
0.9765625
0.984375
0.9609375
0.97265625
0.95703125
0.97265625
0.98046875
0.96875
0.98828125
0.97265625
0.9609375
0.9765625
0.9609375
0.98046875
0.97265625
0.9765625
1.0
0.9921875
0.9921875
0.9765625
0.984375
0.96484375
0.98828125
1.0
0.99609375
1.0
0.9765625
0.96875
0.98828125
0.9921875
1.0
0.984375
0.99609375
0.9765625
0.9765625
1.0
