In this notebook, we compare the time it takes to fit LCEGP with the time it takes to
fit the UnknownCorrelationModel.

- We want to see how the fit time changes as a function of number of alternatives
- We want to compare both fitting the model from scratch and Bayesian updates
with added data. For this, we can look into periodically re-training LCEGP
hyper-parameters, and how that frequency affects the run times.

Let's start by defining some utilities.

# TODO: add runtime comparison with a KG run

In [1]:
import resource
import time

import torch
from contextual_rs.models.custom_fit import custom_fit_gpytorch_model
from gpytorch import ExactMarginalLogLikelihood
from gpytorch.distributions import MultivariateNormal

from contextual_rs.models.lce_gp import LCEGP
from contextual_rs.models.unknown_correlation_model import UnknownCorrelationModel


ckwargs = {"dtype": torch.double, "device": "cpu"}
# TODO: consider CUDA as well


def clock2():
    """
    clock2() -> (t_user,t_system)
    Return a tuple of user/system cpu times.
    """
    return resource.getrusage(resource.RUSAGE_SELF)[:2]


def model_constructor(model_type: str):
    def model_create(X, Y, emb_dim):
        if model_type == "LCEGP":
            model = LCEGP(
                X.view(-1, 1),
                Y.view(-1, 1),
                categorical_cols=[0],
                embs_dim_list=[emb_dim],
            )
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
            custom_fit_gpytorch_model(mll)
            return model
        else:
            model = UnknownCorrelationModel(X, Y, update_method=model_type)
            return model

    return model_create


def get_sampling_mvn(
    K: int,
    rho: float,
) -> MultivariateNormal:
    true_mean = torch.linspace(0, 1, K, **ckwargs)
    true_cov = torch.zeros(K, K, **ckwargs)
    for i in range(K):
        for j in range(K):
            true_cov[i, j] = torch.tensor(rho, **ckwargs).pow(abs(i - j))
    return MultivariateNormal(true_mean, true_cov)


def fit_on_random_data(
    num_alternatives: int,
    num_train: int,
    num_full_train: int,
    model: str,
    replications: int,
    rho: float,
    emb_dim: int = 1,
):
    r"""
    Returns a tuple of averages of wall-time, user cpu-time, sys cpu-time, and total
    cpu-time.
    """
    wall_times = torch.zeros(replications, **ckwargs)
    user_cts = torch.zeros(replications, **ckwargs)
    sys_cts = torch.zeros(replications, **ckwargs)
    for seed in range(replications):
        torch.manual_seed(seed)
        full_train = torch.arange(num_alternatives, **ckwargs).repeat(num_full_train)
        rand_train = torch.randint(0, num_alternatives, (num_train,), **ckwargs)
        train_X = torch.cat([full_train, rand_train])
        mvn = get_sampling_mvn(num_alternatives, rho)
        all_Y = mvn.rsample(torch.Size([num_full_train + num_train])).detach()
        full_Y = all_Y[:num_full_train].view(-1)
        rand_Y = all_Y[num_full_train:].gather(
            dim=-1, index=rand_train.view(-1, 1).long()
        ).view(-1)
        train_Y = torch.cat([full_Y, rand_Y])
        wt_start = time.time()
        ct_start = clock2()
        fitted_model = model_constructor(model)(train_X, train_Y, emb_dim)
        ct_end = clock2()
        wt_end = time.time()
        wall_times[seed] = wt_end - wt_start
        user_cts[seed] = ct_end[0] - ct_start[0]
        sys_cts[seed] = ct_end[1] - ct_start[1]
    return (
        wall_times.mean(),
        user_cts.mean(),
        sys_cts.mean(),
        user_cts.mean() + sys_cts.mean()
    )

We are ready to run the simplest experiments, that is the time it takes to fit the model
from scratch.

In [2]:
alternatives = [
    ("LCEGP", 1), ("LCEGP", 2), ("moment-matching", 0), ("KL", 0), ("moment-KL", 0)
]

In [3]:
kwargs = {
    "num_alternatives": 5,
    "num_train": 10,
    "num_full_train": 2,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )

Running model LCEGP, emb_dim 1:
Wall time 0.15, user time 0.60, sys time 0.00, total cpu time 0.60
Running model LCEGP, emb_dim 2:
Wall time 0.21, user time 0.85, sys time 0.00, total cpu time 0.86
Running model moment-matching, emb_dim 0:
Wall time 0.00, user time 0.01, sys time 0.00, total cpu time 0.01
Running model KL, emb_dim 0:
Wall time 0.00, user time 0.01, sys time 0.00, total cpu time 0.01
Running model moment-KL, emb_dim 0:
Wall time 0.00, user time 0.01, sys time 0.00, total cpu time 0.01


In [4]:
kwargs = {
    "num_alternatives": 5,
    "num_train": 30,
    "num_full_train": 4,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )


Running model LCEGP, emb_dim 1:
Wall time 0.13, user time 0.56, sys time 0.01, total cpu time 0.56
Running model LCEGP, emb_dim 2:
Wall time 0.18, user time 0.79, sys time 0.01, total cpu time 0.81
Running model moment-matching, emb_dim 0:
Wall time 0.01, user time 0.01, sys time 0.00, total cpu time 0.01
Running model KL, emb_dim 0:
Wall time 0.00, user time 0.01, sys time 0.00, total cpu time 0.01
Running model moment-KL, emb_dim 0:
Wall time 0.01, user time 0.01, sys time 0.00, total cpu time 0.01


In [5]:
kwargs = {
    "num_alternatives": 5,
    "num_train": 100,
    "num_full_train": 10,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )


Running model LCEGP, emb_dim 1:
Wall time 0.22, user time 1.10, sys time 0.01, total cpu time 1.10
Running model LCEGP, emb_dim 2:
Wall time 0.49, user time 2.46, sys time 0.02, total cpu time 2.48
Running model moment-matching, emb_dim 0:
Wall time 0.01, user time 0.02, sys time 0.00, total cpu time 0.02
Running model KL, emb_dim 0:
Wall time 0.01, user time 0.02, sys time 0.00, total cpu time 0.02
Running model moment-KL, emb_dim 0:
Wall time 0.01, user time 0.02, sys time 0.00, total cpu time 0.02


In [6]:
kwargs = {
    "num_alternatives": 5,
    "num_train": 400,
    "num_full_train": 40,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )


Running model LCEGP, emb_dim 1:
Wall time 2.00, user time 11.98, sys time 0.13, total cpu time 12.11
Running model LCEGP, emb_dim 2:
Wall time 3.94, user time 23.22, sys time 0.44, total cpu time 23.66
Running model moment-matching, emb_dim 0:
Wall time 0.03, user time 0.03, sys time 0.00, total cpu time 0.03
Running model KL, emb_dim 0:
Wall time 0.02, user time 0.03, sys time 0.00, total cpu time 0.03
Running model moment-KL, emb_dim 0:
Wall time 0.03, user time 0.03, sys time 0.00, total cpu time 0.03


In [7]:
kwargs = {
    "num_alternatives": 20,
    "num_train": 100,
    "num_full_train": 5,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )


Running model LCEGP, emb_dim 1:
Wall time 2.59, user time 14.22, sys time 0.08, total cpu time 14.30
Running model LCEGP, emb_dim 2:
Wall time 4.13, user time 22.47, sys time 0.13, total cpu time 22.60
Running model moment-matching, emb_dim 0:
Wall time 0.03, user time 0.04, sys time 0.00, total cpu time 0.04
Running model KL, emb_dim 0:
Wall time 0.02, user time 0.03, sys time 0.00, total cpu time 0.03
Running model moment-KL, emb_dim 0:
Wall time 0.03, user time 0.04, sys time 0.00, total cpu time 0.04


In [8]:
kwargs = {
    "num_alternatives": 20,
    "num_train": 500,
    "num_full_train": 10,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )

Running model LCEGP, emb_dim 1:
Wall time 19.48, user time 117.29, sys time 1.54, total cpu time 118.83
Running model LCEGP, emb_dim 2:
Wall time 24.27, user time 146.79, sys time 1.16, total cpu time 147.95
Running model moment-matching, emb_dim 0:
Wall time 0.07, user time 0.08, sys time 0.00, total cpu time 0.08
Running model KL, emb_dim 0:
Wall time 0.04, user time 0.05, sys time 0.00, total cpu time 0.05
Running model moment-KL, emb_dim 0:
Wall time 0.07, user time 0.08, sys time 0.00, total cpu time 0.08


In [9]:
kwargs = {
    "num_alternatives": 50,
    "num_train": 100,
    "num_full_train": 5,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )

Running model LCEGP, emb_dim 1:
Wall time 14.42, user time 86.68, sys time 0.59, total cpu time 87.26
Running model LCEGP, emb_dim 2:
Wall time 15.33, user time 91.71, sys time 0.59, total cpu time 92.30
Running model moment-matching, emb_dim 0:
Wall time 0.05, user time 0.06, sys time 0.00, total cpu time 0.06
Running model KL, emb_dim 0:
Wall time 0.03, user time 0.04, sys time 0.00, total cpu time 0.04
Running model moment-KL, emb_dim 0:
Wall time 0.04, user time 0.05, sys time 0.00, total cpu time 0.05


In [10]:
kwargs = {
    "num_alternatives": 50,
    "num_train": 500,
    "num_full_train": 10,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )


Running model LCEGP, emb_dim 1:
Wall time 104.55, user time 639.93, sys time 2.67, total cpu time 642.60
Running model LCEGP, emb_dim 2:
Wall time 79.06, user time 482.45, sys time 3.97, total cpu time 486.42
Running model moment-matching, emb_dim 0:
Wall time 0.14, user time 0.15, sys time 0.00, total cpu time 0.15
Running model KL, emb_dim 0:
Wall time 0.07, user time 0.08, sys time 0.00, total cpu time 0.08
Running model moment-KL, emb_dim 0:
Wall time 0.13, user time 0.14, sys time 0.00, total cpu time 0.14


In [4]:
ckwargs = {"dtype": torch.double, "device": "cuda"}

In [5]:
kwargs = {
    "num_alternatives": 50,
    "num_train": 500,
    "num_full_train": 10,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )

Running model LCEGP, emb_dim 1:
Wall time 1.45, user time 2.18, sys time 0.10, total cpu time 2.28
Running model LCEGP, emb_dim 2:
Wall time 1.43, user time 2.18, sys time 0.09, total cpu time 2.27
Running model moment-matching, emb_dim 0:
Wall time 0.39, user time 0.39, sys time 0.00, total cpu time 0.39
Running model KL, emb_dim 0:
Wall time 0.18, user time 0.17, sys time 0.00, total cpu time 0.17
Running model moment-KL, emb_dim 0:
Wall time 0.36, user time 0.36, sys time 0.00, total cpu time 0.36


In [6]:
kwargs = {
    "num_alternatives": 200,
    "num_train": 500,
    "num_full_train": 10,
    "replications": 30,
    "rho": 0.5,
}

for model, emb_dim in alternatives:
    print(f"Running model {model}, emb_dim {emb_dim}:")
    wt, uct, sct, tct = fit_on_random_data(model=model, emb_dim=emb_dim, **kwargs)
    print(
        f"Wall time {'{:.2f}'.format(float(wt))}, "
        f"user time {'{:.2f}'.format(float(uct))}, "
        f"sys time {'{:.2f}'.format(float(sct))}, "
        f"total cpu time {'{:.2f}'.format(float(tct))}"
    )

Running model LCEGP, emb_dim 1:
Wall time 22.42, user time 25.36, sys time 0.25, total cpu time 25.61
Running model LCEGP, emb_dim 2:
Wall time 17.21, user time 19.46, sys time 0.19, total cpu time 19.65
Running model moment-matching, emb_dim 0:
Wall time 0.62, user time 0.64, sys time 0.00, total cpu time 0.64
Running model KL, emb_dim 0:
Wall time 0.30, user time 0.31, sys time 0.00, total cpu time 0.31
Running model moment-KL, emb_dim 0:
Wall time 0.57, user time 0.58, sys time 0.00, total cpu time 0.58
