# GPU vs. CPU Running Time Test

We investigate the running time of GPytorch on CPU and GPU for training (150 iterations) and computing predictive distributions. For predictive distribution we also investigate the effect of LOVE approximation, so the scenarios are:
- CPU Exact
- GPU Exact
- CPU with LOVE without cache
- GPU with LOVE without cache
- CPU with LOVE with cache
- GPU with LOVE with cache

We use a customized Multitask Kernel with variable number of Kronecker Product, the same Kernel as used in Targeted Adaptive Design. We use 4 dimensional input and output.

The training and testing data are generated using sin and cos functions, etc.


In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
# from Data_Gen_Script import VField
import numpy as np
from scipy.stats import uniform

import time

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from torch import nn
from torch import Tensor
from gpytorch.kernels import MultitaskKernel
from gpytorch.constraints import Positive




class TensorProductKernel(MultitaskKernel):
    """
    Class to get the tensorproduct kernel
    """

    def __init__(self, data_covar_module,  num_tasks, rank=1, pos_constraint = None, tri_constaint = None, task_covar_prior=None, **kwargs):
        super().__init__(data_covar_module, num_tasks, rank, task_covar_prior = None, **kwargs)

        
        
    def forward(self, x1, x2, diag=False, last_dim_is_batch=False, add_jitter = False, **params):
        if last_dim_is_batch:
            raise RuntimeError("MultitaskKernel does not accept the last_dim_is_batch argument.")
        covar_i = self.task_covar_module.covar_matrix #.evaluate()
            
        covar_i = covar_i.evaluate()
        if len(x1.shape[:-2]):
            covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1)
        covar_x = gpytorch.lazy.lazify(self.data_covar_module.forward(x1, x2, **params))#(self.data_covar_module.forward(x1, x2, **params))#
        if (add_jitter == True):
            covar_x = covar_x #+ (1e-6) * torch.eye(covar_x.shape[0])
        res=gpytorch.lazy.KroneckerProductLazyTensor(covar_x, covar_i) #gpytorch.lazy.lazify(torch.kron(covar_x, covar_i))

        return res.diag() if diag else res
        
        
from copy import deepcopy
from typing import List, Optional, Union

from torch.nn import ModuleList

from gpytorch.priors import Prior
from gpytorch.kernels import Kernel
from gpytorch.kernels import IndexKernel
from gpytorch.constraints import Positive

# This is the main Kernel to use

class SepTensorProductKernel(Kernel):
    """
    Class to get the tensorproduct kernel
    """

    def __init__(
        self, base_kernels: List, num_tasks: int, rank: Union[int, List] = 1, 
        task_covar_prior: Optional[Prior] = None
    ):
        """
        Args:
            base_kernels (:type: list of `Kernel` objects): A list of base kernels.
            num_tasks (int): The number of output tasks to fit.
            rank (int): Rank of index kernel to use for task covariance matrix for each
                        of the base kernels.
            task_covar_prior (:obj:`gpytorch.priors.Prior`): Prior to use for each
                task kernel. See :class:`gpytorch.kernels.IndexKernel` for details.
        """
        if len(base_kernels) < 1:
            raise ValueError("At least one base kernel must be provided.")
        for k in base_kernels:
            if not isinstance(k, Kernel):
                raise ValueError("base_kernels must only contain Kernel objects")
        if not isinstance(rank, list):
            rank = [rank] * len(base_kernels)

        super(SepTensorProductKernel, self).__init__()
        self.covar_module_list = ModuleList(
            [
                TensorProductKernel(base_kernel, num_tasks=num_tasks, rank=r, task_covar_prior=task_covar_prior)
                for base_kernel, r in zip(base_kernels, rank)
            ]
        )

    def forward(self, x1, x2, **params):
        res = self.covar_module_list[0].forward(x1, x2, **params)
        for m in self.covar_module_list[1:]:
            res += m.forward(x1, x2, **params)
        return res

    def num_outputs_per_input(self, x1, x2):
        """
        Given `n` data points `x1` and `m` datapoints `x2`, this multitask kernel
        returns an `(n*num_tasks) x (m*num_tasks)` covariance matrix.
        """
        return self.covar_module_list[0].num_outputs_per_input(x1, x2)


    def __getitem__(self, index):
        new_kernel = deepcopy(self)
        new_kernel.covar_module_list = ModuleList(
            [base_kernel.__getitem__(index) for base_kernel in self.covar_module_list]
        )
        return new_kernel



In [3]:
"""
Model Initialization
"""

class MultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_base_kernels):
        super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
        
        self.mean_module = gpytorch.means.MultitaskMean(
              gpytorch.means.ConstantMean(), num_tasks=Dval
        )
        
        base_kernels = []
        for i in range(num_base_kernels):
            base_kernels.append(gpytorch.kernels.ScaleKernel(( gpytorch.kernels.RBFKernel() ))) 
            #gpytorch.kernels.PolynomialKernel(4)  ##gpytorch.kernels.MaternKernel()# (vvk_rbf.vvkRBFKernel())
 
            
        self.covar_module = SepTensorProductKernel(base_kernels,num_tasks = Dval)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

num_base_kernels = 8

In [4]:
cpu_training_time = []
cpu_exact_meancovar = []
cpu_love_meancovar = []
cpu_love_meancovar_cache = []

In [5]:
# cpu_size_vec = [100,300,500,700,1000,1500,2000]
# Nval = 4
# Dval = 4

# for size in cpu_size_vec:
#     print(f"data size: {size}")
#     """Set up the training and testing data"""
#     n = size # input size

# #     x = 5 * torch.rand(n, Dval)

# #     y = torch.stack([
# #         torch.sin(x[:, 0] * (2 * math.pi)) + torch.randn(n) * 0.02,
# #         torch.cos(x[:, 0] * (2 * math.pi)) + torch.cos(x[:, 2] * (2 * math.pi)) + torch.randn(n) * 0.02,
# #         torch.sin(x[:, 2] * (2 * math.pi)) + torch.cos(x[:, 1] * (2 * math.pi)) + torch.randn(n) * 0.02,
# #         (torch.cos(x[:, 3] * (2 * math.pi)))* (torch.sin(x[:, 0] * (2 * math.pi))) + torch.randn(n) * 0.02,
# #     ], -1)

#     x = 3 * torch.rand(n)
    
#     y = torch.stack([
#         torch.sin(3 * x) + torch.randn(n) * 0.01,
#         torch.cos(x) + torch.cos(2 * x) + torch.randn(n) * 0.01,
#         torch.sin(x) + torch.cos(x) + torch.randn(n) * 0.01,
#         torch.cos(x) * torch.cos(x) + torch.randn(n) * 0.01,
#     ], -1)

#     train_x = x[:int(0.8*n)]
#     train_y = y[:int(0.8*n)]

#     test_x = x[int(0.8*n): ]

#     test_y = y[int(0.8*n): ]

# #     # normalize features
# #     mean = train_x.mean()
# #     std = train_x.std() + 1e-6 # prevent dividing by 0
# #     train_x = (train_x - mean) / std
# #     test_x = (test_x - mean) / std

# #     # normalize labels
# #     mean, std = train_y.mean(),train_y.std()
# #     train_y = (train_y - mean) / std
# #     test_y = (test_y - mean) / std

    
    
#     likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=Dval)
#     model = MultitaskGPModel(train_x, train_y, likelihood, num_base_kernels)
    
#     start_time = time.time()
    
#     """train the model hyperparameters"""
#     training_iterations = 150

#     # Find optimal model hyperparameters
#     model.train()
#     likelihood.train()

#     # Use the adam optimizer
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.05)  # Includes GaussianLikelihood parameters

#     # "Loss" for GPs - the marginal log likelihood
#     mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

#     for i in range(training_iterations):
#         optimizer.zero_grad()
#         output = model(train_x)
#         loss = -mll(output, train_y)
#         loss.backward()
# #           if(i > training_iterations*0.8):
#         print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
#         optimizer.step()
    
#     cpu_training_time.append(time.time() - start_time)
    
#     print()
    
#     """ Making predictions with the model"""
#     # Set into eval mode
#     model.eval()
#     likelihood.eval()

#     # Exact predictions
#     with torch.no_grad(): #, gpytorch.settings.fast_pred_var():
#         start_time = time.time()
#         preds = model(test_x) # no noise
#         covar = preds.covariance_matrix
#         cpu_exact_meancovar.append(time.time() - start_time)
    
    
    
#     # LOVE without cache
#         # Clear the cache from the previous computations
#     model.train()
#     likelihood.train()
#     # Set into eval mode
#     model.eval()
#     likelihood.eval()

#     with torch.no_grad(), gpytorch.settings.fast_pred_var():
#         start_time = time.time()
#         preds = model(test_x)
#         fast_covar = preds.covariance_matrix
#         cpu_love_meancovar.append(time.time() - start_time)
    
#     # LOVE with cache
#     with torch.no_grad(), gpytorch.settings.fast_pred_var():
#         start_time = time.time()
#         preds = model(test_x)
#         fast_covar = preds.covariance_matrix
#         cpu_love_meancovar_cache.append(time.time() - start_time)  


In [6]:
print(cpu_training_time)
print(cpu_exact_meancovar)
print(cpu_love_meancovar)
print(cpu_love_meancovar_cache)

[]
[]
[]
[]


In [7]:
# # Initialize plots
# f, (y1_ax, y2_ax) = plt.subplots(1, 2, figsize=(15, 10))

# # This contains predictions for both tasks, flattened out
# # The first half of the predictions is for the first task
# # The second half is for the second task

# # Plot training data as black stars
# y1_ax.plot(train_x[:, 0].detach().numpy(), train_y[:, 0].detach().numpy(), 'k*')
# # Predictive mean as blue line
# y1_ax.plot(test_x[:, 0].numpy(), preds.mean[:, 0].numpy(), 'b')
# # Shade in confidence
# # y1_ax.fill_between(test_x[:, 0].numpy(), lower[:, 0].numpy(), upper[:, 0].numpy(), alpha=0.5)
# # y1_ax.set_ylim([-3, 3])
# y1_ax.legend(['Observed Data', 'Mean', 'Confidence'])
# y1_ax.set_title('Observed Values (Likelihood)')

# # Plot training data as black stars
# y2_ax.plot(train_x[:, 1].detach().numpy(), train_y[:, 1].detach().numpy(), 'k*')
# # Predictive mean as blue line
# y2_ax.plot(test_x[:, 1].numpy(), preds.mean[:, 1].numpy(), 'b')
# # Shade in confidence
# # y2_ax.fill_between(test_x[:, 1].numpy(), lower[:, 1].numpy(), upper[:, 1].numpy(), alpha=0.5)
# # y2_ax.set_ylim([-3, 3])
# y2_ax.legend(['Observed Data', 'Mean', 'Confidence'])
# y2_ax.set_title('Observed Values (Likelihood)')

# None

In [8]:
gpu_training_time = []
gpu_exact_meancovar = []
gpu_love_meancovar = []
gpu_love_meancovar_cache = []
love_covar_error = []

In [9]:
gpu_size_vec = [100,300,500,700,1000,1500,2000,3000,4000]#,5000,6000,7000]
Nval = 4
Dval = 4

for size in gpu_size_vec:
    print(f"data size: {size}")
    """Set up the training and testing data"""
    n = size # input size

#     x = 5 * torch.rand(n, Dval)

#     y = torch.stack([
#         torch.sin(x[:, 0] * (2 * math.pi)) + torch.randn(n) * 0.02,
#         torch.cos(x[:, 0] * (2 * math.pi)) + torch.cos(x[:, 2] * (2 * math.pi)) + torch.randn(n) * 0.02,
#         torch.sin(x[:, 2] * (2 * math.pi)) + torch.cos(x[:, 1] * (2 * math.pi)) + torch.randn(n) * 0.02,
#         (torch.cos(x[:, 3] * (2 * math.pi)))* (torch.sin(x[:, 0] * (2 * math.pi))) + torch.randn(n) * 0.02,
#     ], -1)

    x = 5 * torch.rand(n)
    
    y = torch.stack([
        torch.sin(3 * x) + torch.randn(n) * 0.02,
        torch.cos(x) + torch.cos(2 * x) + torch.randn(n) * 0.02,
        torch.sin(x) + torch.cos(x) + torch.randn(n) * 0.02,
        torch.cos(x) * torch.cos(x) + torch.randn(n) * 0.02,
    ], -1)

#     train_x = torch.Tensor(x[:int(0.8*n), :])
#     train_y = y[:int(0.8*n), :]

#     test_x = torch.Tensor(x[int(0.8*n):, :])

#     test_y = torch.Tensor(y[int(0.8*n):, :])

    train_x = x[:int(0.8*n)]
    train_y = y[:int(0.8*n)]

    test_x = x[int(0.8*n): ]

    test_y = y[int(0.8*n): ]

#     # normalize features
#     mean = train_x.mean(dim=-2, keepdim=True)
#     std = train_x.std(dim=-2, keepdim=True) # + 1e-6 # prevent dividing by 0
#     train_x = (train_x - mean) / std
#     test_x = (test_x - mean) / std

#     # normalize labels
#     mean, std = train_y.mean(),train_y.std()
#     train_y = (train_y - mean) / std
#     test_y = (test_y - mean) / std

#     norm_vec = (vec - mean) / std
    
    
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=Dval)
    model = MultitaskGPModel(train_x, train_y, likelihood, num_base_kernels)
    
    start_time = time.time() # include the time of copying values onto gpu
    
    use_cuda = torch.cuda.is_available()
    print(f"Use Cuda: {use_cuda}")
    if(use_cuda):
        train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
        model, likelihood = model.cuda(), likelihood.cuda()
    
    """train the model hyperparameters"""
    import os
    smoke_test = ('CI' in os.environ)
    training_iterations = 2 if smoke_test else 100

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.09)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    for i in range(training_iterations):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
#           if(i > training_iterations*0.8):
        print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
        optimizer.step()
    
    gpu_training_time.append(time.time() - start_time)
    
    print()
    
    """ Making predictions with the model"""
    # Set into eval mode
    model.eval()
    likelihood.eval()

    # Exact predictions
    with torch.no_grad(): #, gpytorch.settings.fast_pred_var():
        start_time = time.time()
        preds = model(test_x) # no noise
        covar = preds.covariance_matrix
        gpu_exact_meancovar.append(time.time() - start_time)
    
    
    
    # LOVE without cache
        # Clear the cache from the previous computations
    model.train()
    likelihood.train()
    # Set into eval mode
    model.eval()
    likelihood.eval()

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        start_time = time.time()
        preds = model(test_x)
        fast_covar = preds.covariance_matrix
        gpu_love_meancovar.append(time.time() - start_time)
        
        
#     """
#     Compute sum of squared difference LOVE diagonal covariance elements from exact diagonal elements 
#     (again divided by trace of exact covariance to make the quantity normalized), as a function
#     of vector size
#     """
    
#     exactdiag = torch.diagonal(covar)
#     lovediag = torch.diagonal(fast_covar)
    print(covar)
    print(fast_covar)
    
    exactdiag = torch.diagonal(covar)
    lovediag = torch.diagonal(fast_covar)
    diff = (exactdiag - lovediag).square().mean().sqrt()
    diff = diff / exactdiag.square().mean().sqrt()
    diff = diff.log()
    love_covar_error.append(diff)
    print(f"error: {diff}")
    
    # LOVE with cache
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        start_time = time.time()
        preds = model(test_x)
        fast_covar = preds.covariance_matrix
        gpu_love_meancovar_cache.append(time.time() - start_time)  


data size: 100
Use Cuda: True


torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /lus/theta-fs0/software/thetagpu/conda/2022-07-01/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2183.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


Iter 1/100 - Loss: 1.286
Iter 2/100 - Loss: 1.242
Iter 3/100 - Loss: 1.199
Iter 4/100 - Loss: 1.157
Iter 5/100 - Loss: 1.116
Iter 6/100 - Loss: 1.076
Iter 7/100 - Loss: 1.038
Iter 8/100 - Loss: 0.999
Iter 9/100 - Loss: 0.960
Iter 10/100 - Loss: 0.920
Iter 11/100 - Loss: 0.881
Iter 12/100 - Loss: 0.841
Iter 13/100 - Loss: 0.801
Iter 14/100 - Loss: 0.760
Iter 15/100 - Loss: 0.720
Iter 16/100 - Loss: 0.681
Iter 17/100 - Loss: 0.641
Iter 18/100 - Loss: 0.602
Iter 19/100 - Loss: 0.563
Iter 20/100 - Loss: 0.523
Iter 21/100 - Loss: 0.482
Iter 22/100 - Loss: 0.441
Iter 23/100 - Loss: 0.399
Iter 24/100 - Loss: 0.357
Iter 25/100 - Loss: 0.314
Iter 26/100 - Loss: 0.272
Iter 27/100 - Loss: 0.230
Iter 28/100 - Loss: 0.188
Iter 29/100 - Loss: 0.146
Iter 30/100 - Loss: 0.104
Iter 31/100 - Loss: 0.061
Iter 32/100 - Loss: 0.018
Iter 33/100 - Loss: -0.025
Iter 34/100 - Loss: -0.068
Iter 35/100 - Loss: -0.111
Iter 36/100 - Loss: -0.154
Iter 37/100 - Loss: -0.197
Iter 38/100 - Loss: -0.240
Iter 39/100 - L

Iter 9/100 - Loss: 0.852
Iter 10/100 - Loss: 0.811
Iter 11/100 - Loss: 0.769
Iter 12/100 - Loss: 0.730
Iter 13/100 - Loss: 0.687
Iter 14/100 - Loss: 0.650
Iter 15/100 - Loss: 0.619
Iter 16/100 - Loss: 0.570
Iter 17/100 - Loss: 0.541
Iter 18/100 - Loss: 0.494
Iter 19/100 - Loss: 0.448
Iter 20/100 - Loss: 0.411
Iter 21/100 - Loss: 0.364
Iter 22/100 - Loss: 0.332
Iter 23/100 - Loss: 0.280
Iter 24/100 - Loss: 0.240
Iter 25/100 - Loss: 0.190
Iter 26/100 - Loss: 0.150
Iter 27/100 - Loss: 0.102
Iter 28/100 - Loss: 0.062
Iter 29/100 - Loss: 0.012
Iter 30/100 - Loss: -0.033
Iter 31/100 - Loss: -0.075
Iter 32/100 - Loss: -0.127
Iter 33/100 - Loss: -0.170
Iter 34/100 - Loss: -0.217
Iter 35/100 - Loss: -0.261
Iter 36/100 - Loss: -0.306
Iter 37/100 - Loss: -0.347
Iter 38/100 - Loss: -0.394
Iter 39/100 - Loss: -0.439
Iter 40/100 - Loss: -0.488
Iter 41/100 - Loss: -0.532
Iter 42/100 - Loss: -0.578
Iter 43/100 - Loss: -0.628
Iter 44/100 - Loss: -0.680
Iter 45/100 - Loss: -0.711
Iter 46/100 - Loss: -0.

Iter 29/100 - Loss: 0.065
Iter 30/100 - Loss: 0.018
Iter 31/100 - Loss: -0.023
Iter 32/100 - Loss: -0.071
Iter 33/100 - Loss: -0.118
Iter 34/100 - Loss: -0.162
Iter 35/100 - Loss: -0.206
Iter 36/100 - Loss: -0.251
Iter 37/100 - Loss: -0.298
Iter 38/100 - Loss: -0.344
Iter 39/100 - Loss: -0.390
Iter 40/100 - Loss: -0.435
Iter 41/100 - Loss: -0.483
Iter 42/100 - Loss: -0.527
Iter 43/100 - Loss: -0.572
Iter 44/100 - Loss: -0.619
Iter 45/100 - Loss: -0.665
Iter 46/100 - Loss: -0.711
Iter 47/100 - Loss: -0.757
Iter 48/100 - Loss: -0.800
Iter 49/100 - Loss: -0.848
Iter 50/100 - Loss: -0.894
Iter 51/100 - Loss: -0.939
Iter 52/100 - Loss: -0.986
Iter 53/100 - Loss: -1.031
Iter 54/100 - Loss: -1.073
Iter 55/100 - Loss: -1.120
Iter 56/100 - Loss: -1.166
Iter 57/100 - Loss: -1.210
Iter 58/100 - Loss: -1.253
Iter 59/100 - Loss: -1.294
Iter 60/100 - Loss: -1.337
Iter 61/100 - Loss: -1.384
Iter 62/100 - Loss: -1.428
Iter 63/100 - Loss: -1.468
Iter 64/100 - Loss: -1.509
Iter 65/100 - Loss: -1.551
Ite



Iter 99/100 - Loss: -2.385
Iter 100/100 - Loss: -2.588





tensor([[11.0336, -4.8401, -1.4165,  ..., -3.2789, -0.8091, -1.2897],
        [-7.8555, 17.0430,  1.8291,  ..., 10.0891,  1.4181,  4.6633],
        [-0.9008,  0.5843,  0.5845,  ...,  0.6769,  0.4557, -0.5959],
        ...,
        [-3.3321,  9.7346,  0.8020,  ...,  9.2769,  0.5736,  4.1698],
        [-0.7226,  0.7263,  0.2673,  ...,  0.4668,  0.2777, -0.7035],
        [-1.0832,  4.4287,  0.2176,  ...,  4.6524,  0.0996,  2.6682]],
       device='cuda:0')
tensor([[-1.3212e-01,  2.5601e-01,  3.2304e-03,  ...,  1.7650e-01,
          7.1210e-03,  8.6710e-02],
        [ 2.5601e-01, -5.2408e-01, -4.2719e-03,  ..., -3.6851e-01,
         -1.2603e-02, -1.8283e-01],
        [ 3.2304e-03, -4.2719e-03, -6.6185e-04,  ..., -5.8776e-04,
         -6.0582e-04,  1.3554e-04],
        ...,
        [ 1.7650e-01, -3.6851e-01, -5.8770e-04,  ..., -2.7048e-01,
         -6.8333e-03, -1.3598e-01],
        [ 7.1209e-03, -1.2603e-02, -6.0582e-04,  ..., -6.8333e-03,
         -7.2479e-04, -3.0262e-03],
        [ 8.67



Iter 82/100 - Loss: -2.149
Iter 83/100 - Loss: -2.296
Iter 84/100 - Loss: -2.327
Iter 85/100 - Loss: -2.357
Iter 86/100 - Loss: -2.388
Iter 87/100 - Loss: -2.266




Iter 88/100 - Loss: -2.279




Iter 89/100 - Loss: -2.280
Iter 90/100 - Loss: -2.519




Iter 91/100 - Loss: -2.310




Iter 92/100 - Loss: -2.324




Iter 93/100 - Loss: -2.339




Iter 94/100 - Loss: -2.354
Iter 95/100 - Loss: -2.353




Iter 96/100 - Loss: -2.360
Iter 97/100 - Loss: -2.349




Iter 98/100 - Loss: -2.369




Iter 99/100 - Loss: -2.358




Iter 100/100 - Loss: -2.348





tensor([[31.6215, -3.2924,  3.9037,  ..., -3.1591,  3.6145,  4.7987],
        [-3.2804, 16.0375,  4.5311,  ..., 15.6179,  4.5823, -8.6053],
        [ 3.9010,  4.5575,  8.0438,  ...,  4.5570,  7.7015, -3.2313],
        ...,
        [-3.1429, 15.6235,  4.5327,  ..., 16.0000,  4.6017, -8.7859],
        [ 3.5998,  4.5549,  7.6979,  ...,  4.5796,  7.7269, -3.1924],
        [ 4.7789, -8.6051, -3.2201,  ..., -8.7858, -3.2000,  5.4692]],
       device='cuda:0')
tensor([[-0.0118,  0.0020, -0.0029,  ...,  0.0020, -0.0027, -0.0019],
        [ 0.0020, -0.0006,  0.0004,  ..., -0.0006,  0.0004,  0.0005],
        [-0.0029,  0.0004, -0.0007,  ...,  0.0004, -0.0007, -0.0004],
        ...,
        [ 0.0020, -0.0006,  0.0004,  ..., -0.0006,  0.0004,  0.0005],
        [-0.0027,  0.0004, -0.0007,  ...,  0.0004, -0.0006, -0.0004],
        [-0.0019,  0.0005, -0.0004,  ...,  0.0005, -0.0004, -0.0004]],
       device='cuda:0')
tensor(16.0409, device='cuda:0')
error: 0.0002466136065777391
data size: 2000
Use Cu



Iter 84/100 - Loss: -2.207
Iter 85/100 - Loss: -2.353
Iter 86/100 - Loss: -2.250
Iter 87/100 - Loss: -2.445
Iter 88/100 - Loss: -2.469
Iter 89/100 - Loss: -2.292
Iter 90/100 - Loss: -2.310




Iter 91/100 - Loss: -2.326
Iter 92/100 - Loss: -2.349




Iter 93/100 - Loss: -2.349
Iter 94/100 - Loss: -2.357




Iter 95/100 - Loss: -2.363




Iter 96/100 - Loss: -2.372
Iter 97/100 - Loss: -2.378




Iter 98/100 - Loss: -2.378
Iter 99/100 - Loss: -2.386




Iter 100/100 - Loss: -2.385





tensor([[29.4083, -5.7319, -4.1715,  ..., -5.7051, -5.0488,  6.2976],
        [-4.1798,  9.5809, -0.5638,  ...,  9.5203, -0.4189, -3.9827],
        [-2.7312, -0.5183,  3.1773,  ..., -0.5194,  3.4376, -1.1350],
        ...,
        [-4.1544,  9.5399, -0.5662,  ...,  9.4853, -0.4211, -3.9812],
        [-2.7183, -0.5168,  3.1604,  ..., -0.5175,  3.4200, -1.1324],
        [ 4.5090, -3.9603, -0.8336,  ..., -3.9490, -1.1251,  5.3965]],
       device='cuda:0')
tensor([[-0.0226,  0.0029,  0.0026,  ...,  0.0029,  0.0026, -0.0038],
        [ 0.0029, -0.0006, -0.0003,  ..., -0.0006, -0.0003,  0.0006],
        [ 0.0026, -0.0003, -0.0003,  ..., -0.0003, -0.0003,  0.0004],
        ...,
        [ 0.0029, -0.0006, -0.0003,  ..., -0.0006, -0.0003,  0.0006],
        [ 0.0026, -0.0003, -0.0003,  ..., -0.0003, -0.0003,  0.0004],
        [-0.0038,  0.0006,  0.0004,  ...,  0.0006,  0.0004, -0.0007]],
       device='cuda:0')
tensor(17.1098, device='cuda:0')
error: 0.0009766814764589071
data size: 3000
Use Cu



Iter 83/100 - Loss: -2.188
Iter 84/100 - Loss: -2.367
Iter 85/100 - Loss: -2.232
Iter 86/100 - Loss: -2.262
Iter 87/100 - Loss: -2.393
Iter 88/100 - Loss: -2.288




Iter 89/100 - Loss: -2.301




Iter 90/100 - Loss: -2.301




Iter 91/100 - Loss: -2.307
Iter 92/100 - Loss: -2.313
Iter 93/100 - Loss: -2.317
Iter 94/100 - Loss: -2.296




Iter 95/100 - Loss: -2.322




Iter 96/100 - Loss: -2.308




Iter 97/100 - Loss: -2.261




Iter 98/100 - Loss: -2.244
Iter 99/100 - Loss: -2.225




Iter 100/100 - Loss: -2.206





tensor([[11.7863, -3.0480, -0.9126,  ..., -2.2769, -1.3280, -0.4421],
        [-2.3319,  4.4326, -0.8606,  ...,  1.9834,  0.0363, -0.0747],
        [-0.7662, -0.5556,  1.8221,  ..., -0.1018,  0.7348,  0.0955],
        ...,
        [-2.2113,  2.3547, -0.0619,  ...,  5.0057,  0.2943, -0.3298],
        [-0.9477, -0.1553,  0.5283,  ..., -0.9800,  1.4489, -0.1087],
        [-0.5221,  0.0609,  0.1157,  ..., -0.2621,  0.0289,  0.1379]],
       device='cuda:0')
tensor([[-1.7385e-03,  6.4635e-04, -4.9800e-05,  ...,  5.2953e-04,
         -1.0943e-04,  1.4451e-04],
        [ 6.4635e-04, -2.3985e-04,  1.9193e-05,  ..., -1.9681e-04,
          4.1410e-05, -5.3532e-05],
        [-4.9800e-05,  1.9193e-05,  7.1526e-07,  ...,  1.6555e-05,
         -5.6624e-06,  4.4517e-06],
        ...,
        [ 5.2953e-04, -1.9681e-04,  1.6555e-05,  ..., -1.6069e-04,
          3.4571e-05, -4.3690e-05],
        [-1.0943e-04,  4.1410e-05, -5.6624e-06,  ...,  3.4571e-05,
         -7.1526e-06,  9.5516e-06],
        [ 1.44



Iter 78/100 - Loss: -2.069
Iter 79/100 - Loss: -2.187
Iter 80/100 - Loss: -2.222
Iter 81/100 - Loss: -2.213
Iter 82/100 - Loss: -2.151




Iter 83/100 - Loss: -2.162




Iter 84/100 - Loss: -2.182




Iter 85/100 - Loss: -2.157




Iter 86/100 - Loss: -2.128




Iter 87/100 - Loss: -2.109




Iter 88/100 - Loss: -2.102




Iter 89/100 - Loss: -2.077




Iter 90/100 - Loss: -2.125




Iter 91/100 - Loss: -2.145




Iter 92/100 - Loss: -2.186




Iter 93/100 - Loss: -2.180
Iter 94/100 - Loss: -2.193




Iter 95/100 - Loss: -2.179




Iter 96/100 - Loss: -2.241




Iter 97/100 - Loss: -2.210




Iter 98/100 - Loss: -2.232




Iter 99/100 - Loss: -2.225




Iter 100/100 - Loss: -2.242





tensor([[ 9.3188e+00, -6.7244e+00,  7.6170e+00,  ..., -5.5533e+00,
          3.2573e+00, -1.5176e-01],
        [-3.9314e+00,  9.4657e+00, -3.3255e+00,  ...,  2.4589e+00,
         -1.2606e+00, -3.5882e-02],
        [ 6.3331e+00, -4.1173e+00,  8.4479e+00,  ..., -3.3537e+00,
          2.3230e+00,  6.9898e-03],
        ...,
        [-5.3531e-01,  1.3989e+00, -2.3749e-01,  ...,  6.5765e+00,
         -3.5652e+00,  7.3150e-01],
        [ 1.8400e+00, -3.8288e-01,  6.0291e-01,  ..., -4.1123e+00,
          8.7446e+00, -4.2370e-01],
        [ 2.9253e-01, -4.3768e-02,  2.0607e-01,  ...,  6.0666e-01,
         -2.0390e-01,  9.2895e-01]], device='cuda:0')
tensor([[-0.0103,  0.0055, -0.0049,  ..., -0.0071,  0.0147, -0.0034],
        [ 0.0055, -0.0031,  0.0026,  ...,  0.0053, -0.0101,  0.0024],
        [-0.0049,  0.0026, -0.0023,  ..., -0.0030,  0.0064, -0.0015],
        ...,
        [-0.0071,  0.0053, -0.0030,  ..., -0.0647,  0.0961, -0.0246],
        [ 0.0147, -0.0101,  0.0064,  ...,  0.0961, -0.1446

In [10]:
print(gpu_training_time)
print(gpu_exact_meancovar)
print(gpu_love_meancovar)
print(gpu_love_meancovar_cache)

[4.867035627365112, 9.235561609268188, 10.500993728637695, 9.693816423416138, 11.418615579605103, 24.82183837890625, 23.0015971660614, 26.419338703155518, 40.362865686416626]
[0.38283562660217285, 0.23735904693603516, 0.37119531631469727, 0.8079555034637451, 4.700577974319458, 11.02782917022705, 21.38694477081299, 61.8224663734436, 134.51374578475952]
[0.03187155723571777, 0.33365297317504883, 0.4834730625152588, 0.4864842891693115, 1.4249203205108643, 1.626413106918335, 1.466770887374878, 1.5310571193695068, 1.8382620811462402]
[0.015583276748657227, 0.03659701347351074, 0.03690624237060547, 0.036966562271118164, 0.03968024253845215, 0.04920148849487305, 0.06383752822875977, 0.12085819244384766, 0.2277364730834961]


In [11]:
# # plot with various axes scales
# plt.figure()

# CPU vs GPU training
plt.figure(figsize=(5,5))
plt.plot(cpu_size_vec, cpu_training_time, 'r-', label='cpu training time')
plt.plot(gpu_size_vec, gpu_training_time, 'b-', label='gpu training time')
plt.ylabel('Time (second)')
plt.xlabel('Input Size')
plt.title('CPU vs. GPU Training Time')
# plt.grid(True)



plt.legend()
plt.show()

NameError: name 'cpu_size_vec' is not defined

<Figure size 360x360 with 0 Axes>

In [None]:
# Predictive Distribution Computation Time

# # Initialize plots
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(14, 6), sharey=True)

plt.suptitle('Predictive Distribution Computation Time', fontweight='bold')

ax1.plot(cpu_size_vec, cpu_exact_meancovar, 'r-', label='cpu exact')
ax1.plot(gpu_size_vec, gpu_exact_meancovar, 'b-', label='gpu exact')
ax1.legend()


plt.subplot(132)
ax2.plot(cpu_size_vec, cpu_love_meancovar, 'g-', label='cpu love no cache')
ax2.plot(gpu_size_vec, gpu_love_meancovar, 'c-', label='gpu love no cache')
ax2.legend()
# plt.ylabel('Time')
# plt.xlabel('Input Size')

plt.subplot(133)
ax3.plot(cpu_size_vec, cpu_love_meancovar_cache, 'y-', label='cpu love with cache')
ax3.plot(gpu_size_vec, gpu_love_meancovar_cache, 'm-', label='gpu love with cache')
ax3.legend()

plt.setp([ax1,ax2,ax3], xlabel='Input Size')
plt.setp(ax1, ylabel='Time (Second)')

plt.legend()
plt.tight_layout()