# 

# Important Note
RAM and VRAM measurements are dependent on the computer state, and should only be interpreted relative to each other. In order to obtain RAM and VRAM measurements, perform the following steps:

1 - Restart the Kernel

2 - Run the "Loading Required Packages and Helper Functions" cell

3 - Run the "Loading Data" cell

4 - Run ONLY ONE iteration of the desired method, and read the RAM and VRAM usage reports printed by the cell

# Loading Required Packages and Helper Functions
If you would like to use Cuda, set gpu = True. Otherwise set gpu = False. 

Step 1: Run the following cell to import the required packages and helper functions. Set the number of replicates desired.

Step 2: Load the Data

Step 3: Execute the cells under the method you wish to replicate.

# Step 1

In [1]:
gpu = True
n_replicates = 11 

In [2]:
# --- Core Imports ---
import os
import gc
import time
import math
import urllib.request
import statistics
import numpy as np
import pandas as pd
import torch
import gpytorch
import psutil
import tqdm
import faiss
from math import floor
from matplotlib import pyplot as plt
from scipy.io import loadmat
from torch.utils.data import DataLoader, TensorDataset
from memory_profiler import memory_usage
import pynvml

# --- GPyTorch Imports ---
from gpytorch.models import ApproximateGP
from gpytorch.variational.nearest_neighbor_variational_strategy import NNVariationalStrategy
from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
from gpytorch.mlls import DeepApproximateMLL
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, InducingPointKernel
from gpytorch.distributions import MultivariateNormal

# --- Memory and GPU Utilities ---
def clear_gpu():
    for obj in ['model', 'likelihood', 'observed_pred', 'preds', 'output']:
        if obj in globals():
            del globals()[obj]
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    torch.cuda.reset_peak_memory_stats()

def get_mem():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 ** 2)  # Return MB

max_vram = 0
def vram_usage():
    global max_vram
    max_vram = max(max_vram, torch.cuda.memory_allocated())

def log_memory():
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
    max_allocated = torch.cuda.max_memory_allocated() / 1024**2  # MB
    max_reserved = torch.cuda.max_memory_reserved() / 1024**2    # MB
    gpu_used = meminfo.used / 1024**2                            # MB
    sys_used = psutil.virtual_memory().used / 1024**3            # GB
    print(f"[PyTorch] Max Allocated: {max_allocated:.2f} MB | Max Reserved: {max_reserved:.2f} MB")
    print(f"[GPU VRAM] Used (nvidia-smi): {gpu_used:.2f} MB | [System RAM]: {sys_used:.2f} GB")
    return max_allocated, max_reserved, gpu_used, sys_used

# --- IPython Magics (should only be run in notebooks) ---
# These will throw errors outside of Jupyter; include only if running interactively
try:
    get_ipython().run_line_magic('matplotlib', 'inline')
    get_ipython().run_line_magic('load_ext', 'autoreload')
    get_ipython().run_line_magic('autoreload', '2')
except:
    pass


# Loading Data
Step 2: Load the data (note: must run the DataGenerator.Rmd file first)

In [None]:
csvfile = pd.read_csv('Data/data_2d.csv', header = None, dtype=float, delimiter=',')
all_data = torch.tensor(np.array(csvfile)).float()

def splitter(all_data, n_train=80_000, n_test=20_000, random_state=42, move_to_gpu=True):
    assert all_data.ndim == 2 and all_data.shape[1] == 3, \
        "all_data must be [N,3]"
    total_samples = all_data.shape[0]
    assert n_train + n_test <= total_samples, "Not enough samples to split"
    rng = np.random.default_rng(seed=random_state)
    indices = rng.permutation(total_samples)
    train_idx = indices[:n_train]
    test_idx  = indices[n_train:n_train + n_test]
    train = all_data[train_idx]
    test  = all_data[test_idx]

    train_x = train[:, :2].contiguous()
    train_y = train[:,  2].contiguous()
    test_x  = test[:,  :2].contiguous()
    test_y  = test[:,   2].contiguous()

    if move_to_gpu and torch.cuda.is_available():
        train_x = train_x.cuda()
        train_y = train_y.cuda()
        test_x  = test_x.cuda()
        test_y  = test_y.cuda()

    return train_x, train_y, test_x, test_y

train_x, train_y, test_x, test_y = splitter(all_data, n_train= 80_000, n_test = 20_000, random_state=42, move_to_gpu=True)
print(train_x.shape)
print(train_x.size(-1))
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)


# Simulations
Step 3: Execute the simulations to be reproduced. If all simulations are run, there is a summarizer at the end. Otherwise, the relevant statistics are printed at the end of each method.

# Deep Kernel

In [None]:
import time
import torch
import gpytorch
import tqdm
import numpy as np
import statistics
from torch.utils.data import TensorDataset, DataLoader


BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000
N_REPLICATES = n_replicates  # defined elsewhere
TRAIN_ITERS = 60
USE_GPU = torch.cuda.is_available()


def make_loader(x, y, shuffle=False):
    return DataLoader(TensorDataset(x, y), batch_size=BATCH_SIZE, shuffle=shuffle)

# MLP feature extractor
class FeatureNet(torch.nn.Sequential):
    def __init__(self, in_dim):
        super().__init__(
            torch.nn.Linear(in_dim, 1000), torch.nn.ReLU(),
            torch.nn.Linear(1000, 500),   torch.nn.ReLU(),
            torch.nn.Linear(500, 50),     torch.nn.ReLU(),
            torch.nn.Linear(50, 1)
        )


class GPModel(gpytorch.models.ExactGP):
    def __init__(self, x, y, likelihood, feat_net):
        super().__init__(x, y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.GridInterpolationKernel(
            gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5)),
            num_dims=1, grid_size=100
        )
        self.feat_net = feat_net
        self.scale = gpytorch.utils.grid.ScaleToBounds(-1.0, 1.0)

    def forward(self, x):
        proj = self.scale(self.feat_net(x))
        vram_usage()
        return gpytorch.distributions.MultivariateNormal(
            self.mean_module(proj),
            self.covar_module(proj)
        )


mse_l_dkl, time_l_dkl = [], []

for i in range(N_REPLICATES):
    tx, ty, vx, vy = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    if USE_GPU:
        tx, ty, vx, vy = tx.cuda(), ty.cuda(), vx.cuda(), vy.cuda()

    mem_begin = get_mem()


    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPModel(tx, ty, likelihood, FeatureNet(tx.size(-1)))
    if USE_GPU:
        model, likelihood = model.cuda(), likelihood.cuda()

    optimizer = torch.optim.Adam(
        list(model.feat_net.parameters()) +
        list(model.covar_module.parameters()) +
        list(model.mean_module.parameters()) +
        list(likelihood.parameters()),
        lr=0.02
    )
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)


    model.train(); likelihood.train()
    iterator = tqdm.tqdm(range(TRAIN_ITERS), leave=True)
    start = time.time()
    for _ in iterator:
        optimizer.zero_grad()
        output = model(tx)
        loss = -mll(output, ty)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        vram_usage()
        optimizer.step()
    uTime = time.time() - start
    print(uTime)


    mem_diff = get_mem() - mem_begin
    print("Memory Usage:", mem_diff, "MB")


    model.eval(); likelihood.eval()
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        preds = likelihood(model(vx))
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()

    means = preds.mean.cpu()
    MSE = torch.mean((means - vy.cpu())**2)
    mse_l_dkl.append(MSE.item())
    time_l_dkl.append(uTime)

    print(
        f"DKL: Rep {i+1}: "
        f"MSE={MSE:.4f}, "
        f"Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, "
        f"VRAM peak={peak_alloc:.2f}MB"
    )


print(
    round(statistics.mean(mse_l_dkl), 5),
    round(statistics.stdev(mse_l_dkl), 5),
    round(statistics.mean(time_l_dkl), 5),
    round(statistics.stdev(time_l_dkl), 5)
)
clear_gpu()


  if nonzero_indices.storage():
  res = cls(index_tensor, value_tensor, interp_size)
  res = cls(index_tensor, value_tensor, interp_size)
  7%|▋         | 4/60 [00:03<00:51,  1.08it/s, loss=0.803]


KeyboardInterrupt: 

# Sparse GP

In [None]:
import time
import statistics
import numpy as np
import torch
import gpytorch
import tqdm
from torch.utils.data import TensorDataset, DataLoader

class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        base = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=2))
        ip = train_x[::1500].clone()
        self.covar_module = gpytorch.kernels.InducingPointKernel(base, inducing_points=ip, likelihood=likelihood)

    def forward(self, x):
        return gpytorch.distributions.MultivariateNormal(self.mean_module(x), self.covar_module(x))

my_batch_size = 320
smoke_test = False

train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)
test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=my_batch_size, shuffle=False)

BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000
N_REPLICATES = n_replicates
TRAIN_ITERS = 60
USE_GPU = torch.cuda.is_available()

mse_l_sgpr = []
time_l_sgpr = []

for i in range(N_REPLICATES):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=my_batch_size, shuffle=False)
    mem_begin = get_mem()

    likelihood = gpytorch.likelihoods.GaussianLikelihood().double()
    model = GPRegressionModel(train_x, train_y, likelihood).double()
    if gpu:
        model, likelihood = model.cuda(), likelihood.cuda()

    training_iterations = 350
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    def train():
        for _ in tqdm.tqdm(range(training_iterations), desc="Train"):
            optimizer.zero_grad()
            loss = -mll(model(train_x), train_y)
            loss.backward()
            optimizer.step()
            vram_usage()
            torch.cuda.empty_cache()

    begin = time.time()
    train()
    uTime = time.time() - begin
    print("Time:", uTime)

    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, _ in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]

    mem_diff = get_mem() - mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()

    MSE = torch.mean((means - test_y.cpu()) ** 2)
    mse_l_sgpr.append(MSE.item())
    time_l_sgpr.append(uTime)

    print(
        f"SGPR: Rep {i+1}: MSE={MSE:.4f}, Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_sgpr), statistics.stdev(mse_l_sgpr))
print(statistics.mean(time_l_sgpr), statistics.stdev(time_l_sgpr))
print(
    round(statistics.mean(mse_l_sgpr), 5),
    round(statistics.stdev(mse_l_sgpr), 5),
    round(statistics.mean(time_l_sgpr), 5),
    round(statistics.stdev(time_l_sgpr), 5)
)


# LOVE

In [None]:
my_batch_size = 3200
smoke_test = False
train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=my_batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=320, shuffle=False)

class LargeFeatureExtractor(torch.nn.Sequential):
    def __init__(self, input_dim):
        super().__init__()
        self.add_module('linear1', torch.nn.Linear(input_dim, 1000))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(1000, 500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50, 1))
        print("VRAM Usage:", torch.cuda.memory_allocated() / (1024**2), "MB")

class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.GridInterpolationKernel(
            gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=1)),
            grid_size=100, num_dims=1
        )
        self.feature_extractor = LargeFeatureExtractor(input_dim=train_x.size(-1))

    def forward(self, x):
        projected_x = self.feature_extractor(x)
        projected_x = projected_x - projected_x.min(0)[0]
        projected_x = 2 * (projected_x / projected_x.max(0)[0]) - 1
        mean_x = self.mean_module(projected_x)
        covar_x = self.covar_module(projected_x)
        vram_usage()
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

mse_l_love = []
time_l_love = []
BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000

for i in range(n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=320, shuffle=False)
    mem_begin = get_mem()
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)
    if gpu:
        model, likelihood = model.cuda(), likelihood.cuda()
    training_iterations = 40
    model.train(); likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    def train():
        for _ in tqdm.tqdm(range(training_iterations)):
            optimizer.zero_grad()
            loss = -mll(model(train_x), train_y)
            loss.backward()
            vram_usage()
            optimizer.step()

    begin = time.time(); train(); uTime = time.time() - begin
    print("Time:", uTime)
    model.eval(); likelihood.eval()
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        observed_pred = likelihood(model(test_x.to('cuda') if gpu else test_x))
    mem_diff = get_mem() - mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()
    means = observed_pred.mean.cpu()
    MSE = torch.mean((means - test_y.cpu()) ** 2)
    mse_l_love.append(MSE.item()); time_l_love.append(uTime)
    print(
        f"LOVE: Rep {i+1}: MSE={MSE:.4f}, Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_love), statistics.stdev(mse_l_love))
print(statistics.mean(time_l_love), statistics.stdev(time_l_love))
print(
    round(statistics.mean(mse_l_love), 5),
    round(statistics.stdev(mse_l_love), 5),
    round(statistics.mean(time_l_love), 5),
    round(statistics.stdev(time_l_love), 5)
)


# NGD

In [None]:
my_batch_size = 320

from torch.utils.data import TensorDataset, DataLoader

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=my_batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=my_batch_size, shuffle=False)

class GPModel(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points):
        variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0))
        variational_strategy = gpytorch.variational.VariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=False
        )
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        vram_usage()
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

mse_l_ngd = []
time_l_ngd = []

BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000

for i in range(n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=my_batch_size, shuffle=True)
    test_loader = DataLoader(TensorDataset(test_x, test_y), batch_size=my_batch_size, shuffle=False)
    mem_begin = get_mem()

    inducing_points = train_x[::400]
    model = GPModel(inducing_points=inducing_points)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    if gpu:
        model, likelihood = model.cuda(), likelihood.cuda()

    variational_ngd_optimizer = gpytorch.optim.NGD(
        model.variational_parameters(), num_data=train_y.size(0), lr=0.01
    )
    hyperparameter_optimizer = torch.optim.Adam([
        {'params': model.hyperparameters()},
        {'params': likelihood.parameters()},
    ], lr=0.1)

    print("VRAM Usage:", torch.cuda.memory_allocated() / (1024**2), "MB")

    model.train(); likelihood.train()
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

    print("VRAM Usage:", torch.cuda.memory_allocated() / (1024**2), "MB")

    num_epochs = 1
    begin = time.time()

    for _ in tqdm.tqdm(range(num_epochs), desc="Epoch"):
        minibatch = tqdm.tqdm(train_loader, desc="Minibatch", leave=False)
        for x_batch, y_batch in minibatch:
            variational_ngd_optimizer.zero_grad(); hyperparameter_optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            minibatch.set_postfix(loss=loss.item())
            loss.backward()
            variational_ngd_optimizer.step(); hyperparameter_optimizer.step()

    uTime = time.time() - begin
    print("Time:", uTime)

    mem_diff = get_mem() - mem_begin
    print("Memory Usage:", mem_diff / (1024 ** 2), "MB")

    model.eval(); likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, _ in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]

    mem_diff = get_mem() - mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()

    MSE = torch.mean((means - test_y.cpu()) ** 2)
    mse_l_ngd.append(MSE.item()); time_l_ngd.append(uTime)

    print(
        f"LOVE: Rep {i+1}: MSE={MSE:.4f}, Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_ngd), statistics.stdev(mse_l_ngd))
print(statistics.mean(time_l_ngd), statistics.stdev(time_l_ngd))
print(
    round(statistics.mean(mse_l_ngd), 5),
    round(statistics.stdev(mse_l_ngd), 5),
    round(statistics.mean(time_l_ngd), 5),
    round(statistics.stdev(time_l_ngd), 5)
)


VRAM Usage: 8.6396484375 MB
VRAM Usage: 8.6396484375 MB


Epoch:   0%|          | 0/1 [00:15<?, ?it/s]                             


KeyboardInterrupt: 

# SVGP_CI

In [None]:
my_batch_size = 3200
smoke_test = False

from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)
test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=my_batch_size, shuffle=False)
inducing_points = train_x[::5000]

class GPModel(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points):
        variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0))
        variational_strategy = gpytorch.variational.CiqVariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True
        )
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=2)
        )
        self.covar_module.base_kernel.initialize(lengthscale=0.01)  # Specific to the 3droad dataset
        print("VRAM Usage:", torch.cuda.memory_allocated()/(1024**2) , "MB")

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        vram_usage()
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


mse_l_svgpci = []
time_l_svgpci = []

BATCH_SIZE = 32
my_batch_size = 3200
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000
for i in np.arange(0,n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    train_dataset = TensorDataset(train_x, train_y)#batch_size=1024
    train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)
    test_dataset = TensorDataset(test_x, test_y)
    test_loader = DataLoader(test_dataset, batch_size=320, shuffle=False)
    mem_begin = get_mem()
    
    model = GPModel(inducing_points=inducing_points)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    
    if gpu:
        model = model.cuda()
        likelihood = likelihood.cuda()
    
    variational_ngd_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=train_y.size(0), lr=0.1)
    
    hyperparameter_optimizer = torch.optim.Adam([
        {'params': model.hyperparameters()},
        {'params': likelihood.parameters()},
    ], lr=0.002) #0.01 for 100k, 0.002 for Dense
    
    model.train()
    likelihood.train()
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))
    
    num_epochs = 4#10
    
    begin = time.time()
    epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch")
    for i in epochs_iter:
        minibatch_iter = tqdm.tqdm(train_loader, desc="Minibatch", leave=False, position = 0)
    
        for x_batch, y_batch in minibatch_iter:
            variational_ngd_optimizer.zero_grad()
            hyperparameter_optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            minibatch_iter.set_postfix(loss=loss.item())
            loss.backward()
            variational_ngd_optimizer.step()
            vram_usage()
            hyperparameter_optimizer.step()

    uTime = time.time()-begin
    print("Time: ", time.time()-begin)
    
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        if gpu:
            observed_pred = likelihood(model(test_x.to('cuda')))
        else:
            observed_pred = likelihood(model(test_x))
    mem_diff = get_mem()-mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()
    means = observed_pred.mean.cpu()
    MSE = torch.mean((means - test_y.cpu())*(means - test_y.cpu()))
    mse_l_svgpci.append(MSE.item())
    time_l_svgpci.append(uTime)
    print(
        f"LOVE: Rep {i+1}: "
        f"MSE={MSE:.4f}, "
        f"Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, "
        f"VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_svgpci))
print(statistics.stdev(mse_l_svgpci))

print(statistics.mean(time_l_svgpci))
print(statistics.stdev(time_l_svgpci))

print(round(statistics.mean(mse_l_svgpci),5),round(statistics.stdev(mse_l_svgpci),5), round(statistics.mean(time_l_svgpci),5), round(statistics.stdev(time_l_svgpci),5))

# SVGP

In [None]:
my_batch_size = 3200

from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)#batch_size=1024
train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)

test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=my_batch_size, shuffle=False)

from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy

class GPModel(ApproximateGP):
    def __init__(self, inducing_points):
        variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
        variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=False)
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5))

        

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

mse_l_svgp = []
time_l_svgp = []

BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000
for i in np.arange(0,n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    train_dataset = TensorDataset(train_x, train_y)
    train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)
    test_dataset = TensorDataset(test_x, test_y)
    test_loader = DataLoader(test_dataset, batch_size=320, shuffle=False)
    mem_begin = get_mem()
    
    inducing_points = train_x[::500]
    model = GPModel(inducing_points=inducing_points)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()
    mem_diff = get_mem() - mem_begin
    print("RAM: ", mem_diff / (1024 ** 2))
    num_epochs = 5
    
    model.train()
    likelihood.train()
    
    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
        {'params': likelihood.parameters()},
    ], lr=0.001)

    #mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    
    mem_diff = get_mem() - mem_begin
    print("RAM: ", mem_diff / (1024 ** 2))
    epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch")
    
    begin = time.time()
    for i in tqdm.tqdm(range(num_epochs), leave = False, position = 0):
        minibatch_iter = tqdm.tqdm(train_loader, desc="Minibatch", leave=False, position = 0)
        for x_batch, y_batch in minibatch_iter:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            loss.backward()
            max_ram = max(max_ram, (get_mem() - mem_begin))
            optimizer.step()
            if gpu:
                max_vram = max(max_vram, torch.cuda.memory_allocated())
        print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
            i + 1, num_epochs, loss.item(),
            model.covar_module.base_kernel.lengthscale.item(),
            likelihood.noise.item()
        ))
    uTime = time.time()-begin
    print("Time: ", time.time() - begin)
    mem_diff = get_mem() - mem_begin
    print("RAM: ", max_ram / (1024 ** 2))
    print("VRAM: ", max_vram / (1024 ** 2))
    
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])

     
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        if gpu:
            observed_pred = likelihood(model(test_x.to('cuda')))
        else:
            observed_pred = likelihood(model(test_x))
    mem_diff = get_mem()-mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()
    means = observed_pred.mean.cpu()
    MSE = torch.mean((means - test_y.cpu())*(means - test_y.cpu()))
    mse_l_svgp.append(MSE.item())
    time_l_svgp.append(uTime)
    print(
        f"LOVE: Rep {i+1}: "
        f"MSE={MSE:.4f}, "
        f"Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, "
        f"VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_svgp))
print(statistics.stdev(mse_l_svgp))

print(statistics.mean(time_l_svgp))
print(statistics.stdev(time_l_svgp))

print(round(statistics.mean(mse_l_svgp),5),round(statistics.stdev(mse_l_svgp),5), round(statistics.mean(time_l_svgp),5), round(statistics.stdev(time_l_svgp),5))

RAM:  2.391636371612549e-06
RAM:  2.391636371612549e-06


                                                           

KeyboardInterrupt: 

# SKI - Can only handle up to 40,000 datapoints before running out of memory

In [None]:

model = None
likelihood = None

if gpu:
    gc.collect()


In [None]:
train_x_ski = train_x[::2]
train_y_ski = train_y[::2]

if gpu:
    train_x_ski, train_y_ski = train_x_ski.cuda(), train_y_ski.cuda()

In [None]:
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, GridInterpolationKernel
from gpytorch.distributions import MultivariateNormal


class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)

        grid_size = gpytorch.utils.grid.choose_grid_size(train_x, 1)

        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.MaternKernel(nu=1.5), grid_size=grid_size, num_dims=2
            )
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


import os
smoke_test = ('CI' in os.environ)
training_iterations = 32


mse_l_ski = []
time_l_ski = []

# Config
BATCH_SIZE = 32
TRAIN_SIZE, TEST_SIZE = 40_000, 20_000
for i in np.arange(0,n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    train_dataset = TensorDataset(train_x, train_y)#batch_size=1024
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_dataset = TensorDataset(test_x, test_y)
    test_loader = DataLoader(test_dataset, batch_size=320, shuffle=False)
    mem_begin = get_mem()
    
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    if gpu:
        mll = mll.cuda()

    
    training_iterations = 15
    begin = time.time()
    
    for i in tqdm.tqdm(range(training_iterations), desc="Train", leave = False, position = 0 ):
        optimizer.zero_grad()
        if gpu:
            max_vram = max(max_vram, torch.cuda.memory_allocated())
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        if gpu:
            max_vram = max(max_vram, torch.cuda.memory_allocated())
        optimizer.step()

    uTime = time.time()-begin
    print(time.time()-begin)
    print("RAM: ",(get_mem() - mem_begin)/(1024**2))
    print("VRAM: ", max_vram / (1024 ** 2))
    
    model.eval()
    with gpytorch.settings.prior_mode():
        output = (model(test_x))
    mem_diff = get_mem()-mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()
    means = output.mean.cpu()
    MSE = torch.mean((means - test_y.cpu())*(means - test_y.cpu()))
    mse_l_ski.append(MSE.item())
    time_l_ski.append(uTime)
    print(
        f"LOVE: Rep {i+1}: "
        f"MSE={MSE:.4f}, "
        f"Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, "
        f"VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_ski))
print(statistics.stdev(mse_l_ski))
print(statistics.mean(time_l_ski))
print(statistics.stdev(time_l_ski))
print(round(statistics.mean(mse_l_ski),5),round(statistics.stdev(mse_l_ski),5), round(statistics.mean(time_l_ski),5), round(statistics.stdev(time_l_ski),5))

  if nonzero_indices.storage():
  res = cls(index_tensor, value_tensor, interp_size)
  res = cls(index_tensor, value_tensor, interp_size)
                                                      

7.958143711090088
RAM:  0.0004656873643398285
VRAM:  32.3515625
[PyTorch] Max Allocated: 8291.03 MB | Max Reserved: 10416.00 MB
[GPU VRAM] Used (nvidia-smi): 12057.15 MB | [System RAM]: 23.40 GB
LOVE: Rep 15: MSE=0.1282, Time=7.96s, RAM Δ=488.31MB, VRAM peak=8291.03MB


                                                      

7.547093391418457
RAM:  1.4156103134155273e-07
VRAM:  32.81884765625
[PyTorch] Max Allocated: 8291.50 MB | Max Reserved: 10412.00 MB
[GPU VRAM] Used (nvidia-smi): 12015.09 MB | [System RAM]: 23.40 GB
LOVE: Rep 15: MSE=0.1275, Time=7.55s, RAM Δ=0.15MB, VRAM peak=8291.50MB


                                                     

KeyboardInterrupt: 

# VNN

In [9]:
clear_gpu()

In [None]:
my_batch_size = 32
smoke_test = False

import faiss
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)#batch_size=1024
train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)
test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=my_batch_size, shuffle=False)
from gpytorch.models import ApproximateGP
from gpytorch.variational.nearest_neighbor_variational_strategy import NNVariationalStrategy

class GPModel(ApproximateGP):
    def __init__(self, inducing_points, likelihood, k=256, training_batch_size=256):
        m, d = inducing_points.shape
        self.m = m
        self.k = k
        variational_distribution = gpytorch.variational.MeanFieldVariationalDistribution(m)
        if gpu:
            inducing_points = inducing_points.cuda()
        variational_strategy = NNVariationalStrategy(self, inducing_points, variational_distribution, k=k,
                                                     training_batch_size=training_batch_size)
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ZeroMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=d))
        self.likelihood = likelihood

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        vram_usage()
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

    def __call__(self, x, prior=False, **kwargs):
        if x is not None:
            if x.dim() == 1:
                x = x.unsqueeze(-1)
        return self.variational_strategy(x=x, prior=False, **kwargs)

begin = time.time()
if smoke_test:
    k = 32
    training_batch_size = 32
else:
    k = 256
    training_batch_size = 64
k = 160
training_batch_size = 320*4
mse_l_vnn = []
time_l_vnn = []
my_batch_size = 32
TRAIN_SIZE, TEST_SIZE = 400_000, 20_000
for i in np.arange(0,n_replicates):
    train_x, train_y, test_x, test_y = splitter(
        all_data, n_train=TRAIN_SIZE, n_test=TEST_SIZE,
        random_state=42 + i, move_to_gpu=gpu
    )
    train_dataset = TensorDataset(train_x, train_y)#batch_size=1024
    train_loader = DataLoader(train_dataset, batch_size=my_batch_size, shuffle=True)

    test_dataset = TensorDataset(test_x, test_y)
    test_loader = DataLoader(test_dataset, batch_size=my_batch_size, shuffle=False)
    print("Replicate: ",i)
    mem_begin = get_mem()
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPModel(inducing_points=train_x[::1].contiguous(), likelihood=likelihood, k=k, training_batch_size=training_batch_size)
    if gpu:
        likelihood = likelihood.cuda()
        model = model.cuda()
    print(time.time()-begin)
    num_epochs = 1 if smoke_test else 20
    num_epochs = 10#30
    num_batches = model.variational_strategy._total_training_batches
    
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    
    begin = time.time()
    epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch", leave=True, position = 0)
    for epoch in epochs_iter:
        minibatch_iter = tqdm.tqdm(range(num_batches), leave=True, position = 0)
    
        for i in minibatch_iter:
            optimizer.zero_grad()
            output = model(x=None)
            current_training_indices = model.variational_strategy.current_training_indices
            y_batch = train_y[...,current_training_indices]
            if gpu:
                y_batch = y_batch.cuda()
            loss = -mll(output, y_batch)
            minibatch_iter.set_postfix(loss=loss.item())
            loss.backward()
            vram_usage()
            optimizer.step()
    uTime = time.time() - begin
    print("Time: ", time.time() - begin)
    print("VRAM: ", max_vram/(1024 ** 2))
    print("RAM: ", (get_mem() - mem_begin)/(1024**2))
    
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    mem_diff = get_mem()-mem_begin
    peak_alloc, peak_reserved, gpu_used, sys_used = log_memory()
    means = means[1:]
    MSE = torch.mean((means - test_y.cpu())*(means - test_y.cpu()))
    mse_l_vnn.append(MSE.item())
    time_l_vnn.append(uTime)

    model = None
    likelihood = None
    mll = None
    optimizer = None
    epochs_iter = None
    if gpu:
        gc.collect()
    
    print(
        f"VNN: Rep {i+1}: "
        f"MSE={MSE:.4f}, "
        f"Time={uTime:.2f}s, "
        f"RAM Δ={mem_diff:.2f}MB, "
        f"VRAM peak={peak_alloc:.2f}MB"
    )
    clear_gpu()

print(statistics.mean(mse_l_vnn))
print(statistics.stdev(mse_l_vnn))
print(statistics.mean(time_l_vnn))
print(statistics.stdev(time_l_vnn))
print(round(statistics.mean(mse_l_vnn),5),round(statistics.stdev(mse_l_vnn),5), round(statistics.mean(time_l_vnn),5), round(statistics.stdev(time_l_vnn),5))

Replicate:  0
1
2


  x.storage().data_ptr() + x.storage_offset() * 4)


21
22
23
3
561.1702280044556


100%|██████████| 314/314 [00:06<00:00, 50.03it/s, loss=0.327]
100%|██████████| 314/314 [00:06<00:00, 49.18it/s, loss=0.0305]   
100%|██████████| 314/314 [00:06<00:00, 48.42it/s, loss=-0.376]
100%|██████████| 314/314 [00:05<00:00, 54.12it/s, loss=0.168]
100%|██████████| 314/314 [00:07<00:00, 43.79it/s, loss=-0.175]
100%|██████████| 314/314 [00:06<00:00, 52.22it/s, loss=-0.148] 
100%|██████████| 314/314 [00:06<00:00, 45.76it/s, loss=-0.11]  
100%|██████████| 314/314 [00:06<00:00, 47.13it/s, loss=-0.183] 
100%|██████████| 314/314 [00:05<00:00, 53.39it/s, loss=-0.0609]  
100%|██████████| 314/314 [00:07<00:00, 41.18it/s, loss=-0.127]
Epoch: 100%|██████████| 10/10 [01:05<00:00,  6.52s/it]


Time:  65.19755530357361
VRAM:  1673.62890625
RAM:  0.0006361007690429688
[PyTorch] Max Allocated: 1676.71 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5176.22 MB | [System RAM]: 24.06 GB
VNN: Rep 314: MSE=0.1021, Time=65.20s, RAM Δ=662.16MB, VRAM peak=1676.71MB
Replicate:  1
1
2
21
22
23
3
616.663957118988


100%|██████████| 314/314 [00:06<00:00, 51.84it/s, loss=0.36] 
100%|██████████| 314/314 [00:05<00:00, 52.73it/s, loss=-0.011]   
100%|██████████| 314/314 [00:06<00:00, 51.31it/s, loss=-0.407]
100%|██████████| 314/314 [00:06<00:00, 50.79it/s, loss=0.188]
100%|██████████| 314/314 [00:06<00:00, 52.25it/s, loss=-0.254]
100%|██████████| 314/314 [00:06<00:00, 48.91it/s, loss=-0.142] 
100%|██████████| 314/314 [00:06<00:00, 50.57it/s, loss=-0.0883] 
100%|██████████| 314/314 [00:06<00:00, 50.77it/s, loss=-0.152] 
100%|██████████| 314/314 [00:06<00:00, 50.55it/s, loss=-0.0171]  
100%|██████████| 314/314 [00:06<00:00, 51.33it/s, loss=-0.226]
Epoch: 100%|██████████| 10/10 [01:01<00:00,  6.15s/it]


Time:  61.50834941864014
VRAM:  1673.62890625
RAM:  0.00023777782917022705
[PyTorch] Max Allocated: 1676.72 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5123.16 MB | [System RAM]: 24.11 GB
VNN: Rep 314: MSE=0.1002, Time=61.51s, RAM Δ=245.66MB, VRAM peak=1676.72MB
Replicate:  2
1
2
21
22
23
3
576.545111656189


100%|██████████| 314/314 [00:06<00:00, 50.40it/s, loss=0.367]
100%|██████████| 314/314 [00:06<00:00, 47.55it/s, loss=0.0081]   
100%|██████████| 314/314 [00:06<00:00, 50.65it/s, loss=-0.332]
100%|██████████| 314/314 [00:07<00:00, 43.15it/s, loss=0.179]
100%|██████████| 314/314 [00:05<00:00, 53.15it/s, loss=-0.213]
100%|██████████| 314/314 [00:06<00:00, 47.18it/s, loss=-0.184] 
100%|██████████| 314/314 [00:06<00:00, 50.16it/s, loss=-0.0966] 
100%|██████████| 314/314 [00:05<00:00, 53.70it/s, loss=-0.159] 
100%|██████████| 314/314 [00:07<00:00, 43.89it/s, loss=-0.00476] 
100%|██████████| 314/314 [00:05<00:00, 53.96it/s, loss=-0.169]
Epoch: 100%|██████████| 10/10 [01:03<00:00,  6.40s/it]


Time:  63.982200384140015
VRAM:  1673.712890625
RAM:  0.00024169310927391052
[PyTorch] Max Allocated: 1676.86 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5094.04 MB | [System RAM]: 24.05 GB
VNN: Rep 314: MSE=0.1001, Time=63.98s, RAM Δ=250.78MB, VRAM peak=1676.86MB
Replicate:  3
1
2
21
22
23
3
579.282312631607


100%|██████████| 314/314 [00:07<00:00, 43.91it/s, loss=0.452]
100%|██████████| 314/314 [00:05<00:00, 52.35it/s, loss=-0.0248]  
100%|██████████| 314/314 [00:07<00:00, 43.69it/s, loss=-0.404]
100%|██████████| 314/314 [00:06<00:00, 50.97it/s, loss=0.191]
100%|██████████| 314/314 [00:06<00:00, 51.10it/s, loss=-0.139]
100%|██████████| 314/314 [00:06<00:00, 47.49it/s, loss=-0.123]
100%|██████████| 314/314 [00:05<00:00, 54.75it/s, loss=-0.0166] 
100%|██████████| 314/314 [00:06<00:00, 47.05it/s, loss=-0.128]
100%|██████████| 314/314 [00:06<00:00, 51.07it/s, loss=-0.00026] 
100%|██████████| 314/314 [00:06<00:00, 46.81it/s, loss=-0.193]
Epoch: 100%|██████████| 10/10 [01:04<00:00,  6.45s/it]


Time:  64.55292177200317
VRAM:  1673.712890625
RAM:  0.0002416856586933136
[PyTorch] Max Allocated: 1676.69 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5101.71 MB | [System RAM]: 24.11 GB
VNN: Rep 314: MSE=0.1014, Time=64.55s, RAM Δ=250.68MB, VRAM peak=1676.69MB
Replicate:  4
1
2
21
22
23
3
580.2057065963745


100%|██████████| 314/314 [00:06<00:00, 46.05it/s, loss=0.306]
100%|██████████| 314/314 [00:05<00:00, 55.14it/s, loss=0.0431]   
100%|██████████| 314/314 [00:06<00:00, 45.54it/s, loss=-0.395]
100%|██████████| 314/314 [00:06<00:00, 51.10it/s, loss=0.196]
100%|██████████| 314/314 [00:06<00:00, 50.16it/s, loss=-0.202]
100%|██████████| 314/314 [00:06<00:00, 47.11it/s, loss=-0.168] 
100%|██████████| 314/314 [00:05<00:00, 55.53it/s, loss=-0.0774]
100%|██████████| 314/314 [00:07<00:00, 44.10it/s, loss=-0.136]
100%|██████████| 314/314 [00:06<00:00, 49.31it/s, loss=-0.0497]  
100%|██████████| 314/314 [00:06<00:00, 45.92it/s, loss=-0.137]
Epoch: 100%|██████████| 10/10 [01:04<00:00,  6.45s/it]


Time:  64.51261234283447
VRAM:  1673.712890625
RAM:  0.0002418719232082367
[PyTorch] Max Allocated: 1676.86 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5096.64 MB | [System RAM]: 24.05 GB
VNN: Rep 314: MSE=0.1028, Time=64.51s, RAM Δ=250.89MB, VRAM peak=1676.86MB
Replicate:  5
1
2
21
22
23
3
585.2091274261475


100%|██████████| 314/314 [00:07<00:00, 43.99it/s, loss=0.315]
100%|██████████| 314/314 [00:05<00:00, 53.78it/s, loss=-0.0033]  
100%|██████████| 314/314 [00:06<00:00, 49.54it/s, loss=-0.428]
100%|██████████| 314/314 [00:06<00:00, 48.41it/s, loss=0.171]
100%|██████████| 314/314 [00:05<00:00, 56.78it/s, loss=-0.207]
100%|██████████| 314/314 [00:06<00:00, 49.02it/s, loss=-0.0909]
100%|██████████| 314/314 [00:06<00:00, 52.25it/s, loss=-0.107] 
100%|██████████| 314/314 [00:06<00:00, 49.29it/s, loss=-0.138] 
100%|██████████| 314/314 [00:06<00:00, 45.56it/s, loss=-0.0254]  
100%|██████████| 314/314 [00:05<00:00, 58.01it/s, loss=-0.22] 
Epoch: 100%|██████████| 10/10 [01:02<00:00,  6.24s/it]


Time:  62.447962284088135
VRAM:  1673.71484375
RAM:  0.0002416558563709259
[PyTorch] Max Allocated: 1676.86 MB | Max Reserved: 1708.00 MB
[GPU VRAM] Used (nvidia-smi): 5059.91 MB | [System RAM]: 24.08 GB
VNN: Rep 314: MSE=0.1008, Time=62.45s, RAM Δ=250.65MB, VRAM peak=1676.86MB
Replicate:  6
1
2
21
22
23
3
585.0397419929504


100%|██████████| 314/314 [00:04<00:00, 66.92it/s, loss=0.348]
100%|██████████| 314/314 [00:04<00:00, 69.36it/s, loss=-0.000761]
100%|██████████| 314/314 [00:04<00:00, 68.61it/s, loss=-0.363]
100%|██████████| 314/314 [00:04<00:00, 67.47it/s, loss=0.179]
100%|██████████| 314/314 [00:04<00:00, 68.49it/s, loss=-0.208]
100%|██████████| 314/314 [00:04<00:00, 69.30it/s, loss=-0.187] 
100%|██████████| 314/314 [00:04<00:00, 69.47it/s, loss=-0.118] 
100%|██████████| 314/314 [00:04<00:00, 69.05it/s, loss=-0.194] 
100%|██████████| 314/314 [00:04<00:00, 67.46it/s, loss=-0.0166] 
100%|██████████| 314/314 [00:04<00:00, 69.42it/s, loss=-0.176]
Epoch: 100%|██████████| 10/10 [00:45<00:00,  4.58s/it]


Time:  45.82531785964966
VRAM:  1673.71484375
RAM:  0.0002415888011455536
[PyTorch] Max Allocated: 1676.77 MB | Max Reserved: 1706.00 MB
[GPU VRAM] Used (nvidia-smi): 5241.78 MB | [System RAM]: 24.34 GB
VNN: Rep 314: MSE=0.1021, Time=45.83s, RAM Δ=250.58MB, VRAM peak=1676.77MB
Replicate:  7
1
2


# Compile Table (MSE and Time only)

SKI
SGPR
LOVE
DKL
SVGP-CI
SVGP
NGD
VNN


In [None]:
print("SKI     --- MSE:",statistics.mean(mse_l_ski), "(",statistics.stdev(mse_l_ski),")  Time:", statistics.mean(time_l_ski), "(",statistics.stdev(time_l_ski),")")
print("SGPR    --- MSE:",statistics.mean(mse_l_sgpr), "(",statistics.stdev(mse_l_sgpr),")  Time:", statistics.mean(time_l_sgpr), "(",statistics.stdev(time_l_sgpr),")")
print("LOVE    --- MSE:",statistics.mean(mse_l_love), "(",statistics.stdev(mse_l_love),")  Time:", statistics.mean(time_l_love), "(",statistics.stdev(time_l_love),")")
print("DKL     --- MSE:",statistics.mean(mse_l_dkl), "(",statistics.stdev(mse_l_dkl),")  Time:", statistics.mean(time_l_dkl), "(",statistics.stdev(time_l_dkl),")")
print("SVGP-CI --- MSE:",statistics.mean(mse_l_svgpci), "(",statistics.stdev(mse_l_svgpci),")  Time:", statistics.mean(time_l_svgpci), "(",statistics.stdev(time_l_svgpci),")")
print("SVGP    --- MSE:",statistics.mean(mse_l_svgp), "(",statistics.stdev(mse_l_svgp),")  Time:", statistics.mean(time_l_svgp), "(",statistics.stdev(time_l_svgp),")")
print("NGD     --- MSE:",statistics.mean(mse_l_ngd), "(",statistics.stdev(mse_l_ngd),")  Time:", statistics.mean(time_l_ngd), "(",statistics.stdev(time_l_ngd),")")
print("VNN     --- MSE:",statistics.mean(mse_l_vnn), "(",statistics.stdev(mse_l_vnn),")  Time:", statistics.mean(time_l_vnn), "(",statistics.stdev(time_l_vnn),")")

Reordering
SVGP
SVGP-CI
VNN
NGD
DKL
SGPR
SKI
LOVE

In [None]:
print("SVGP    --- MSE:",statistics.mean(mse_l_svgp), "(",statistics.stdev(mse_l_svgp),")  Time:", statistics.mean(time_l_svgp), "(",statistics.stdev(time_l_svgp),")")
print("SVGP-CI --- MSE:",statistics.mean(mse_l_svgpci), "(",statistics.stdev(mse_l_svgpci),")  Time:", statistics.mean(time_l_svgpci), "(",statistics.stdev(time_l_svgpci),")")
print("VNN     --- MSE:",statistics.mean(mse_l_vnn), "(",statistics.stdev(mse_l_vnn),")  Time:", statistics.mean(time_l_vnn), "(",statistics.stdev(time_l_vnn),")")
print("NGD     --- MSE:",statistics.mean(mse_l_ngd), "(",statistics.stdev(mse_l_ngd),")  Time:", statistics.mean(time_l_ngd), "(",statistics.stdev(time_l_ngd),")")
print("DKL     --- MSE:",statistics.mean(mse_l_dkl), "(",statistics.stdev(mse_l_dkl),")  Time:", statistics.mean(time_l_dkl), "(",statistics.stdev(time_l_dkl),")")
print("SGPR    --- MSE:",statistics.mean(mse_l_sgpr), "(",statistics.stdev(mse_l_sgpr),")  Time:", statistics.mean(time_l_sgpr), "(",statistics.stdev(time_l_sgpr),")")
print("SKI     --- MSE:",statistics.mean(mse_l_ski), "(",statistics.stdev(mse_l_ski),")  Time:", statistics.mean(time_l_ski), "(",statistics.stdev(time_l_ski),")")
print("LOVE    --- MSE:",statistics.mean(mse_l_love), "(",statistics.stdev(mse_l_love),")  Time:", statistics.mean(time_l_love), "(",statistics.stdev(time_l_love),")")

In [None]:
import statistics

methods = [
    ("SVGP",   mse_l_svgp,   time_l_svgp),
    ("SVGP-CI",mse_l_svgpci, time_l_svgpci),
    ("NGD",    mse_l_ngd,    time_l_ngd),
    ("DKL",    mse_l_dkl,    time_l_dkl),
    ("SGPR",   mse_l_sgpr,   time_l_sgpr),
    ("SKI",    mse_l_ski,    time_l_ski),
    ("LOVE",   mse_l_love,   time_l_love),
]

for name, mse_list, time_list in methods:
    data_mse  = mse_list[1:]
    data_time = time_list[1:]

    mean_mse  = statistics.mean(data_mse)
    sd_mse    = statistics.stdev(data_mse)
    mean_time = statistics.mean(data_time)
    sd_time   = statistics.stdev(data_time)
    print(f"{name:<8} & {mean_mse:.4f}  & ({sd_mse:.4f} )  & {mean_time:.4f}  & ({sd_time:.4f} )")
