In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import torch
import gpytorch

from tqdm import tqdm

project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
if project_dir not in sys.path:
    sys.path.append(project_dir)

from utils.data import load_data
from utils.tool import prediction_summary

# Prepare Data

In [2]:
os.chdir(project_dir)
train_loader, val_loader, test_loader = load_data()
# change dir back
os.chdir(os.path.join(project_dir, 'baselines/IDW'))

File1: 3BAGEmnnQ2K4zF49Dkkoxg.csv contains missing hours
File4: 4XEJFVFOS761cvyEjOYf0g.csv contains outliers
File5: 6kzhfU9xTKCUVJMz492l2g.csv contains outliers
File6: 6nBLCf6WT06TOuUExPkBtA.csv contains missing hours
File17: JQ1px-xqQx-xKh3Oa5h9nA.csv contains missing hours
File21: OfAvTbS1SiOjQo4WKSAP9g.csv contains missing hours
File24: R2ebpAblQHylOjteA-2hlQ.csv contains missing hours
File37: jDYxIP2JQL2br5aTIAR7JQ.csv contains outliers
File38: kyRUtBOTTaK7V_-dxOJTwg.csv contains outliers
File45: wSo2iRgjT36eWC4a2joWZg.csv contains outliers


In [4]:
X_m = test_loader.dataset.locations[test_loader.dataset.train_idx, :]
Y_m = test_loader.dataset.readings[:, test_loader.dataset.train_idx]
print(X_m.shape, Y_m.shape)

X_u = test_loader.dataset.locations[test_loader.dataset.test_idx, :]
Y_u = test_loader.dataset.readings[:, test_loader.dataset.test_idx]
print(X_u.shape, Y_u.shape)

n_steps = Y_m.shape[0]
print(n_steps)

torch.Size([28, 2]) torch.Size([2928, 28])
torch.Size([9, 2]) torch.Size([2928, 9])
2928


# Interpolation Algorithm

In [5]:
class LocalPeriodicKernel(gpytorch.kernels.Kernel):
    is_stationary = True

    def __init__(self, lp_ard=None, **kwargs):
        super().__init__(**kwargs)
        if lp_ard is not None:
            self.periodickernel = gpytorch.kernels.PeriodicKernel(arg_num_dims=lp_ard)
            self.rbfkernel = gpytorch.kernels.RBFKernel(arg_num_dims=lp_ard)
        else:
            self.periodickernel = gpytorch.kernels.PeriodicKernel()
            self.rbfkernel = gpytorch.kernels.RBFKernel()
        self.localperiodickernel = self.periodickernel * self.rbfkernel

    #kernel function
    def forward(self, x1, x2, **params):
        return self.localperiodickernel(x1, x2, **params)
    
class BaseKernel(gpytorch.kernels.Kernel):
    def __init__(self, matern_ard=None, lp_ard=None, **kwargs):
        super().__init__(**kwargs)
        if matern_ard is not None:
            self.maternkernel = gpytorch.kernels.MaternKernel(nu=0.5,ard_num_dims=matern_ard)
        else:
            self.maternkernel = gpytorch.kernels.MaternKernel(nu=0.5)
        if lp_ard is not None:
            self.localperiodickernel = LocalPeriodicKernel(lp_ard=lp_ard)
        else:
            self.localperiodickernel = LocalPeriodicKernel()

    def forward(self, x1, x2, **params):
        # separate input into conutinuous and periodic components
        x1_per = x1[:, :4]
        x1_cont = x1[:, 4:]
        x2_per = x2[:, :4]
        x2_cont = x2[:, 4:]
        return self.maternkernel(x1_cont, x2_cont, **params) * self.localperiodickernel(x1_per, x2_per, **params)

class GlobalKernel(gpytorch.kernels.Kernel):
    is_stationary = True

    def __init__(self, matern_ard=None, lp_ard=None, **kwargs):
        super().__init__(**kwargs)

        # base kernel
        self.basekernel = BaseKernel(matern_ard=matern_ard, lp_ard=lp_ard)

        # scale kernel
        self.scalekernel = gpytorch.kernels.ScaleKernel(self.basekernel)

    
    def forward(self, x1, x2, **params):
        return self.scalekernel(x1, x2, **params)
    

class AirGP(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, matern_ard=None, lp_ard=None,):
        super(AirGP, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = GlobalKernel(matern_ard=matern_ard, lp_ard=lp_ard)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [6]:
class GP_Model(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GP_Model, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(nu=0.5, ard_num_dims=2)
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)

        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [7]:
Y_true = []
Y_pred = []
for i in range(n_steps):
    X_train = X_m
    Y_train = Y_m[i, :]

    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GP_Model(X_train, Y_train, likelihood)
    training_iter = 10000
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    
    try:
        # training
        loss_500 = []
        for iter in (pbar := tqdm(range(training_iter))):
            optimizer.zero_grad()
            output = model(X_train)
            loss = -mll(output, Y_train)
            loss.backward()
            optimizer.step()
            pbar.set_description(f"Time step: {i}  Loss: {loss.item():.6f}")

            # early stopping
            if len(loss_500) < 500:
                loss_500.append(loss.item())
            else:
                loss_500.pop(0)
                loss_500.append(loss.item())
                if loss_500[0] - loss_500[-1] < 1e-3:
                    break

        # evaluation
        model.eval()
        likelihood.eval()
        with torch.no_grad():
            test_x = X_u
            pred = likelihood(model(test_x))
            pred_mean = pred.mean
            Y_true.append(Y_u[i, :])
            Y_pred.append(pred_mean)
    except:
        print(f"Time step {i} failed")
        continue
Y_true = torch.cat(Y_true, dim=0).numpy()
Y_pred = torch.cat(Y_pred, dim=0).numpy()

prediction_summary(Y_true, Y_pred, True)


Time step: 0  Loss: 1.753324:  29%|██▊       | 2874/10000 [00:05<00:14, 505.91it/s]
Time step: 1  Loss: 1.794676:  35%|███▌      | 3512/10000 [00:07<00:13, 492.45it/s]
Time step: 2  Loss: 1.940947:  20%|██        | 2049/10000 [00:04<00:17, 463.42it/s]
Time step: 3  Loss: 1.897622:  39%|███▊      | 3855/10000 [00:07<00:12, 494.68it/s]
Time step: 4  Loss: 3.181818:  87%|████████▋ | 8681/10000 [00:17<00:02, 484.93it/s]
Time step: 5  Loss: 2.224248:  68%|██████▊   | 6796/10000 [00:14<00:06, 482.40it/s]
Time step: 6  Loss: 2.786995:  75%|███████▍  | 7457/10000 [00:15<00:05, 488.55it/s]
Time step: 7  Loss: 2.627750:  52%|█████▏    | 5237/10000 [00:10<00:09, 490.19it/s]
Time step: 8  Loss: 2.223839:  38%|███▊      | 3808/10000 [00:08<00:11, 523.89it/s]

In [19]:
Y_train

tensor([10.6320,  7.3957, 13.9568, 12.1435,  8.6852,  6.9780,  8.0955,  7.0192,
        12.8787, 13.6720, 11.1712, 11.1398,  8.6703,  8.7202,  8.9923,  9.9490,
        10.5202, 10.7688,  6.7730, 10.2640,  9.9610,  5.3460, 14.5042,  9.4257,
         5.2400, 11.3452, 11.7212,  9.3607])