In [2]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
if torch.cuda.is_available():
    cuda = torch.device('cuda')

In [3]:
 class DGP(nn.Module):
    def __init__(self, dim_in, dim_h, dim_out, num_induce, dim_z):
        super(DGP, self).__init__()
        self.dim_in = dim_in
        self.dim_h = dim_h + [dim_out] #출력층 차원수도 포함
        self.dim_out = dim_out
        self.num_induce = num_induce
        self.dim_z = dim_z
        
        #커널 하이퍼 파라미터 [layer][dim][Precision/Scaler]
        self.ARD_KHP = {}
        for ind, width in enumerate(self.dim_h):
            self.ARD_KHP[ind] = {}
            for dim in range(width):
                self.ARD_KHP[ind][dim] = {}
                self.ARD_KHP[ind][dim][0] = torch.randn(1, requires_grad=True).cuda() #Precision
                self.ARD_KHP[ind][dim][1] = torch.randn(1, requires_grad=True).cuda() #Scaler
        
        #ARD 파라미터 [layer][dim]
        self.ARD_weight = {}
        prev_dim = dim_in
        for ind, width in enumerate(self.dim_h):
            self.ARD_weight[ind] = {}
            for dim in range(width):
                self.ARD_weight[ind][dim] = torch.ones((prev_dim, 1), requires_grad=True).cuda() #이전 층의 차원 수 만큼의 ARD 파라미터를 가짐.
            prev_dim = width
        
        #Inducing point varational parameter initializing용
        self.normal_dist = torch.distributions.MultivariateNormal(torch.zeros(self.num_induce), torch.eye(self.num_induce))
        
        #q(Z) [dim_z][mean/logvar][num_induce]
        self.q_z = {}
        for ind in range(self.dim_z):
            ind = "dim " + str(ind)
            self.q_z[ind] = {}
            #mean vector
            self.q_z[ind][0] = self.normal_dist.sample().squeeze().cuda()
            self.q_z[ind][0].require_grad = True
            #log var vector for making diagonal cov mat
            #assume diagonal cov mat for each z dim
            self.q_z[ind][1] = self.normal_dist.sample().squeeze().cuda()
            self.q_z[ind][1].require_grad = True
        
        #q(X) [dim_h][width][mean/logvar][]
        self.q_x = {}
        for ind, width in enumerate(self.dim_h):
            if ind == len(self.dim_h) - 1:
                ind = "out"
            else:
                ind = "hidden " + str(ind)            
            self.q_x[ind] = {}
            for dim in range(width):
                dim = "dim " + str(dim)
                #mean vector
                self.q_x[ind][dim] = {}
                self.q_x[ind][dim][0] = self.normal_dist.sample().squeeze()
                self.q_x[ind][dim][0].require_grad = True
                #log var vector for making diagonal cov mat
                #assume diagonal cov mat for each z dim
                self.q_x[ind][dim][1] = self.normal_dist.sample().squeeze()
                self.q_x[ind][dim][1].require_grad = True
        
        
        
    def update(self):
        for ind, weight in enumerate(self.dim_h):
            self.inducing_gram[ind] = self.gen_kernel()

    def kernel(self, x1, x2, layer):
        #ARD squared exponential kernel
        res = torch.empty(0, device=cuda)
        for dim in range(self.dim_h[layer]):
            res = torch.cat((res, self.ARD_KHP * torch.exp(-0.5*self.ARD_weight[layer][dim]*((x1 - x2)**2))))
        return res
    
    def gen_gram(self, X, layer):
        with torch.cuda.device(0):
            gram = torch.zeros(self.dim_h[layer], X.shape[0], X.shape[0])
            for i in range(X.shape[0]):
                for j in range(i + 1, X.shape[0]):
                    gram[:, i, j] = self.kernel(X[i,:], X[j,:], layer)
            gram += gram.transpose(1, 2)
        return gram
"""  
    def predict

    def loss(self, pred, true):
        #논문에 적힌 Variational Lower Bound 구현
        g_Y = 
        r_X = 
        ent_q = torch.zero(1)
        for layer, width in enumerate(dim_h):
            ent_q += 0.5 * (width * (1 + torch.log(2 * np.pi)) + torch.log(torch.det()))
        KLD = torch.sum(self.q_z_var + (self.q_z_mean ** 2) - torch.log(self.q_z_var) - 1)
        return g_Y + r_X + ent_q - KLD
    
    def forward(self, x):
        return 
"""
model = DGP(3, [5, 4], 3, 10, 10).cuda()

In [4]:
model.q_x

{'hidden 0': {'dim 0': {0: tensor([ 1.7544, -0.9738,  2.2291,  1.6986, -0.3788,  0.5093,  1.5721,  0.7334,
           -0.0913,  0.7867]),
   1: tensor([ 0.2453,  0.5409,  1.2960, -1.0988, -1.6336, -0.2094, -0.8414,  1.5037,
            1.0400,  0.2447])},
  'dim 1': {0: tensor([ 0.0023, -0.1311, -1.1004, -0.4292, -0.9047,  0.9575,  1.0021,  0.3088,
           -0.8963, -0.8142]),
   1: tensor([ 0.9734,  0.6366, -0.3404, -0.9988, -0.3845,  0.5376,  2.5069,  0.1145,
            0.5485,  1.8045])},
  'dim 2': {0: tensor([ 0.6291,  1.1244,  1.6941, -0.8975, -0.2336,  0.5612, -0.0400, -0.1889,
           -0.7147, -0.0330]),
   1: tensor([-1.4685,  1.1591, -0.9319,  1.4161,  0.1267, -0.6124,  1.0211,  1.0955,
           -0.4380, -0.5487])},
  'dim 3': {0: tensor([ 0.8054,  0.5660, -1.0774,  0.3816,  0.2961,  2.2559, -0.7331, -0.1979,
           -1.1274,  0.9697]),
   1: tensor([-0.4184,  0.8969,  0.9777, -1.5054,  0.5407,  1.2159, -0.7630, -1.0303,
            1.0611,  1.0992])},
  'dim 4': {

In [5]:
a = torch.ones(3, 3).cuda()
model.gen_gram(a, 0)

TypeError: mul() received an invalid combination of arguments - got (dict), but expected one of:
 * (Tensor other)
      didn't match because some of the arguments have invalid types: ([31;1mdict[0m)
 * (Number other)
      didn't match because some of the arguments have invalid types: ([31;1mdict[0m)


In [30]:
x = torch.ones(1).detach()
b = torch.randn(5, requires_grad=True)
print(x)
print(b)

y = x - b
loss = torch.sum(y**2)
loss.backward()

optimizer = optim.SGD([b], 0.5)
optimizer.step()
print(x)
print(b)


tensor([1.])
tensor([ 0.2573,  1.5947, -0.2132,  0.6438, -1.7222], requires_grad=True)
tensor([1.])
tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [None]:
torch.cat(torch,)