Evaluate the Gaussian Process with only location input.

In [1]:
import torch
import gpytorch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
from tqdm import tqdm

from utils import average_hour

# Load Data

In [4]:
data_dir = "../InterpolationBaseline/data/Oct0123_Jan3024/"
data_files = [file for file in os.listdir(data_dir) if file.endswith(".csv")]
data = []
for i, file in enumerate(data_files):
    df = pd.read_csv(data_dir + file)
    df = average_hour(df)

    # remove sensors with missing data
    if len(df) < 2928:
        print("File{}: {} contains missing hours".format(i, file))
        continue

    # remove sensors with outliers
    if df["pm25"].max() > 500:
        print("File{}: {} contains outliers".format(i, file))
        continue

    data.append(df.loc[:, ["pm25", "longitude", "latitude"]])

data = np.array(data).transpose(1, 0, 2)
X = data[:, :, 1:]
Y = data[:, :, 0]
print(X.shape, Y.shape)

File1: 6kzhfU9xTKCUVJMz492l2g.csv contains outliers
File8: OfAvTbS1SiOjQo4WKSAP9g.csv contains missing hours
File17: wSo2iRgjT36eWC4a2joWZg.csv contains outliers
File19: jDYxIP2JQL2br5aTIAR7JQ.csv contains outliers
File21: 4XEJFVFOS761cvyEjOYf0g.csv contains outliers
File22: 3BAGEmnnQ2K4zF49Dkkoxg.csv contains missing hours
File26: 6nBLCf6WT06TOuUExPkBtA.csv contains missing hours
File36: R2ebpAblQHylOjteA-2hlQ.csv contains missing hours
File43: JQ1px-xqQx-xKh3Oa5h9nA.csv contains missing hours
File50: kyRUtBOTTaK7V_-dxOJTwg.csv contains outliers
(2928, 41, 2) (2928, 41)


# Construct GP Model

In [5]:
class GlobalKernel(gpytorch.kernels.Kernel):
    def __init__(self, matern_ard=None, **kwargs):
        super(GlobalKernel, self).__init__(**kwargs)
        if matern_ard is not None:
            self.maternkernel = gpytorch.kernels.MaternKernel(nu=0.5, ard_num_dims=matern_ard)
        else:
            self.maternkernel = gpytorch.kernels.MaternKernel(nu=0.5)
        
        self.scalekernel = gpytorch.kernels.ScaleKernel(self.maternkernel)

    def forward(self, x1, x2, **params):
        return self.scalekernel(x1, x2, **params)
    
class AirGP(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, matern_ard=None):
        super(AirGP, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = GlobalKernel(matern_ard=matern_ard)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# LOOCV

In [9]:
n_steps, n_stations, n_features = X.shape
print(n_steps, n_stations, n_features)

2928 41 2


In [12]:
Y_true_all = []
Y_pred_all = []
for t in tqdm(np.arange(n_steps)):
    try:
        np.random.seed(42)

        # leave one out split
        i = np.random.randint(n_stations)
        X_train = torch.from_numpy(np.concatenate([X[t, :i], X[t, i+1:]], axis=0)).float()
        Y_train = torch.from_numpy(np.concatenate([Y[t, :i], Y[t, i+1:]], axis=0)).float()
        X_test = torch.from_numpy(X[t, i:i+1]).float()
        Y_test = torch.from_numpy(Y[t, i:i+1]).float()

        # prepare training
        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        model = AirGP(X_train, Y_train, likelihood, matern_ard=n_features)
        training_iter = 10000
        model.train()
        likelihood.train()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
        
        # training
        for iter in range(training_iter):
            model.train()
            likelihood.train()
            optimizer.zero_grad()
            output = model(X_train)
            loss = -mll(output, Y_train)
            loss.backward()
            optimizer.step()

        # evaluation
        model.eval()
        likelihood.eval()
        Y_pred = model(X_test)
        Y_pred = Y_pred.mean.detach()
        Y_true_all.append(Y_test)
        Y_pred_all.append(Y_pred)
    except Exception as e:
        print("Failed at step {}".format(t))
        continue

Y_true_all = torch.cat(Y_true_all, dim=0).numpy()
Y_pred_all = torch.cat(Y_pred_all, dim=0).numpy()

  0%|          | 5/2928 [00:39<5:40:18,  6.99s/it]

Failed at step 4


  3%|▎         | 93/2928 [14:20<7:00:03,  8.89s/it]

Failed at step 92


  4%|▍         | 123/2928 [18:48<5:19:37,  6.84s/it]

Failed at step 122


  5%|▌         | 147/2928 [22:23<5:47:54,  7.51s/it]

Failed at step 146


  6%|▌         | 175/2928 [26:38<6:19:49,  8.28s/it]

Failed at step 174


  9%|▉         | 275/2928 [42:11<6:43:53,  9.13s/it]

Failed at step 274


 12%|█▏        | 337/2928 [51:39<4:58:16,  6.91s/it]

Failed at step 336


 12%|█▏        | 363/2928 [55:37<5:10:18,  7.26s/it]

Failed at step 362


 12%|█▏        | 364/2928 [55:40<4:08:33,  5.82s/it]

Failed at step 363


 13%|█▎        | 385/2928 [58:45<4:49:13,  6.82s/it]

Failed at step 384


 13%|█▎        | 386/2928 [58:52<4:46:57,  6.77s/it]

Failed at step 385


 14%|█▎        | 399/2928 [1:00:45<5:01:07,  7.14s/it]

Failed at step 398


 14%|█▍        | 407/2928 [1:01:55<5:28:23,  7.82s/it]

Failed at step 406


 16%|█▋        | 483/2928 [1:13:26<4:49:22,  7.10s/it]

Failed at step 482


 18%|█▊        | 518/2928 [1:25:12<60:25:59, 90.27s/it]

Failed at step 517


 18%|█▊        | 525/2928 [1:30:36<19:38:29, 29.43s/it] 

Failed at step 524


 25%|██▌       | 746/2928 [2:23:56<4:17:23,  7.08s/it]   

Failed at step 745


 30%|███       | 888/2928 [2:45:17<3:53:40,  6.87s/it]

Failed at step 887


 31%|███       | 894/2928 [2:46:04<3:46:20,  6.68s/it]

Failed at step 893


 31%|███       | 895/2928 [2:46:05<2:53:59,  5.13s/it]

Failed at step 894


 32%|███▏      | 942/2928 [2:53:19<4:39:49,  8.45s/it]

Failed at step 941


 34%|███▎      | 986/2928 [3:00:00<4:18:24,  7.98s/it]

Failed at step 985


 36%|███▌      | 1041/2928 [3:08:25<4:35:00,  8.74s/it]

Failed at step 1040


 36%|███▌      | 1058/2928 [3:10:53<3:45:04,  7.22s/it]

Failed at step 1057


 36%|███▌      | 1059/2928 [3:10:57<3:12:07,  6.17s/it]

Failed at step 1058


 36%|███▋      | 1064/2928 [3:11:40<4:04:58,  7.89s/it]

Failed at step 1063


 37%|███▋      | 1076/2928 [3:13:21<3:28:57,  6.77s/it]

Failed at step 1075


 37%|███▋      | 1082/2928 [3:14:14<4:17:30,  8.37s/it]

Failed at step 1081


 38%|███▊      | 1106/2928 [3:17:49<4:09:35,  8.22s/it]

Failed at step 1105


 38%|███▊      | 1111/2928 [3:18:32<4:13:15,  8.36s/it]

Failed at step 1110


 39%|███▊      | 1129/2928 [3:21:08<3:21:42,  6.73s/it]

Failed at step 1128


 40%|███▉      | 1158/2928 [3:25:21<3:10:33,  6.46s/it]

Failed at step 1157


 41%|████      | 1195/2928 [3:30:47<3:05:22,  6.42s/it]

Failed at step 1194


 42%|████▏     | 1226/2928 [3:35:25<3:52:06,  8.18s/it]

Failed at step 1225


 42%|████▏     | 1227/2928 [3:35:27<2:59:44,  6.34s/it]

Failed at step 1226


 42%|████▏     | 1228/2928 [3:35:33<2:52:12,  6.08s/it]

Failed at step 1227


 42%|████▏     | 1232/2928 [3:36:06<3:33:04,  7.54s/it]

Failed at step 1231


 44%|████▍     | 1295/2928 [3:45:32<3:23:13,  7.47s/it]

Failed at step 1294


 44%|████▍     | 1297/2928 [3:45:43<2:45:12,  6.08s/it]

Failed at step 1296


 47%|████▋     | 1389/2928 [3:59:29<2:58:49,  6.97s/it]

Failed at step 1388


 48%|████▊     | 1392/2928 [3:59:48<2:31:47,  5.93s/it]

Failed at step 1391


 48%|████▊     | 1420/2928 [4:04:02<3:48:08,  9.08s/it]

Failed at step 1419


 49%|████▉     | 1440/2928 [4:06:56<2:56:20,  7.11s/it]

Failed at step 1439


 50%|█████     | 1475/2928 [4:12:11<3:18:15,  8.19s/it]

Failed at step 1474


 51%|█████     | 1486/2928 [4:13:45<2:51:46,  7.15s/it]

Failed at step 1485


 51%|█████     | 1487/2928 [4:13:46<2:07:47,  5.32s/it]

Failed at step 1486


 53%|█████▎    | 1561/2928 [4:24:51<2:41:24,  7.08s/it]

Failed at step 1560


 56%|█████▌    | 1640/2928 [4:36:40<2:24:27,  6.73s/it]

Failed at step 1639


 65%|██████▌   | 1913/2928 [5:18:04<2:04:52,  7.38s/it]

Failed at step 1912


 65%|██████▌   | 1914/2928 [5:18:08<1:47:33,  6.36s/it]

Failed at step 1913


 65%|██████▌   | 1915/2928 [5:18:11<1:28:12,  5.22s/it]

Failed at step 1914


 65%|██████▌   | 1917/2928 [5:18:23<1:29:53,  5.33s/it]

Failed at step 1916


 66%|██████▌   | 1918/2928 [5:18:25<1:13:55,  4.39s/it]

Failed at step 1917


 66%|██████▌   | 1924/2928 [5:19:16<2:04:54,  7.46s/it]

Failed at step 1923


 66%|██████▌   | 1925/2928 [5:19:18<1:39:50,  5.97s/it]

Failed at step 1924


 66%|██████▌   | 1926/2928 [5:19:21<1:22:28,  4.94s/it]

Failed at step 1925


 66%|██████▌   | 1927/2928 [5:19:24<1:12:35,  4.35s/it]

Failed at step 1926


 66%|██████▌   | 1928/2928 [5:19:28<1:11:33,  4.29s/it]

Failed at step 1927


 66%|██████▌   | 1929/2928 [5:19:32<1:08:41,  4.13s/it]

Failed at step 1928


 66%|██████▌   | 1930/2928 [5:19:35<1:01:20,  3.69s/it]

Failed at step 1929


 66%|██████▌   | 1931/2928 [5:19:37<54:41,  3.29s/it]  

Failed at step 1930


 66%|██████▌   | 1932/2928 [5:19:39<50:20,  3.03s/it]

Failed at step 1931


 66%|██████▌   | 1933/2928 [5:19:42<46:59,  2.83s/it]

Failed at step 1932


 66%|██████▌   | 1934/2928 [5:19:45<47:13,  2.85s/it]

Failed at step 1933


 66%|██████▌   | 1935/2928 [5:19:47<46:40,  2.82s/it]

Failed at step 1934


 66%|██████▌   | 1936/2928 [5:19:54<1:06:35,  4.03s/it]

Failed at step 1935


 66%|██████▌   | 1937/2928 [5:19:58<1:04:01,  3.88s/it]

Failed at step 1936


 66%|██████▌   | 1938/2928 [5:20:00<55:35,  3.37s/it]  

Failed at step 1937


 66%|██████▌   | 1939/2928 [5:20:02<49:27,  3.00s/it]

Failed at step 1938


 66%|██████▋   | 1947/2928 [5:21:07<1:44:26,  6.39s/it]

Failed at step 1946


 67%|██████▋   | 1948/2928 [5:21:08<1:19:01,  4.84s/it]

Failed at step 1947


 67%|██████▋   | 1949/2928 [5:21:09<1:01:23,  3.76s/it]

Failed at step 1948


 67%|██████▋   | 1950/2928 [5:21:10<46:17,  2.84s/it]  

Failed at step 1949


 67%|██████▋   | 1952/2928 [5:21:20<57:13,  3.52s/it]  

Failed at step 1951


 67%|██████▋   | 1954/2928 [5:21:31<1:11:03,  4.38s/it]

Failed at step 1953


 74%|███████▍  | 2170/2928 [5:54:10<1:21:40,  6.46s/it]

Failed at step 2169


 74%|███████▍  | 2172/2928 [5:54:19<1:05:33,  5.20s/it]

Failed at step 2171


 74%|███████▍  | 2174/2928 [5:54:29<57:18,  4.56s/it]  

Failed at step 2173


 74%|███████▍  | 2175/2928 [5:54:29<41:47,  3.33s/it]

Failed at step 2174


 74%|███████▍  | 2178/2928 [5:54:50<1:02:44,  5.02s/it]

Failed at step 2177


 74%|███████▍  | 2181/2928 [5:55:09<1:05:16,  5.24s/it]

Failed at step 2180


 75%|███████▍  | 2183/2928 [5:55:20<1:05:22,  5.26s/it]

Failed at step 2182


 77%|███████▋  | 2242/2928 [6:04:13<1:32:59,  8.13s/it]

Failed at step 2241


 77%|███████▋  | 2244/2928 [6:04:29<1:28:54,  7.80s/it]

Failed at step 2243


 81%|████████  | 2376/2928 [6:24:24<1:05:03,  7.07s/it]

Failed at step 2375


 81%|████████▏ | 2379/2928 [6:24:45<1:00:03,  6.56s/it]

Failed at step 2378


 82%|████████▏ | 2398/2928 [6:27:38<1:20:48,  9.15s/it]

Failed at step 2397


 82%|████████▏ | 2399/2928 [6:27:46<1:17:47,  8.82s/it]

Failed at step 2398


 82%|████████▏ | 2400/2928 [6:27:50<1:07:02,  7.62s/it]

Failed at step 2399


 83%|████████▎ | 2424/2928 [6:31:25<1:05:48,  7.83s/it]

Failed at step 2423


 84%|████████▎ | 2449/2928 [6:35:04<53:04,  6.65s/it]  

Failed at step 2448


 84%|████████▎ | 2450/2928 [6:35:05<39:10,  4.92s/it]

Failed at step 2449


 84%|████████▎ | 2451/2928 [6:35:06<28:48,  3.62s/it]

Failed at step 2450


 84%|████████▍ | 2458/2928 [6:36:01<47:40,  6.09s/it]  

Failed at step 2457


 84%|████████▍ | 2459/2928 [6:36:02<35:12,  4.50s/it]

Failed at step 2458


 84%|████████▍ | 2460/2928 [6:36:06<35:01,  4.49s/it]

Failed at step 2459


 84%|████████▍ | 2461/2928 [6:36:07<25:56,  3.33s/it]

Failed at step 2460


 84%|████████▍ | 2462/2928 [6:36:08<20:34,  2.65s/it]

Failed at step 2461


 84%|████████▍ | 2463/2928 [6:36:09<17:25,  2.25s/it]

Failed at step 2462


 84%|████████▍ | 2464/2928 [6:36:11<16:56,  2.19s/it]

Failed at step 2463


 84%|████████▍ | 2465/2928 [6:36:12<14:10,  1.84s/it]

Failed at step 2464


 84%|████████▍ | 2474/2928 [6:37:26<48:08,  6.36s/it]  

Failed at step 2473


 85%|████████▍ | 2475/2928 [6:37:31<43:26,  5.75s/it]

Failed at step 2474


 85%|████████▌ | 2496/2928 [6:40:40<1:02:58,  8.75s/it]

Failed at step 2495


 85%|████████▌ | 2498/2928 [6:40:54<53:32,  7.47s/it]  

Failed at step 2497


 92%|█████████▏| 2682/2928 [7:09:02<27:25,  6.69s/it]  

Failed at step 2681


 92%|█████████▏| 2684/2928 [7:09:12<22:14,  5.47s/it]

Failed at step 2683


 92%|█████████▏| 2685/2928 [7:09:13<16:33,  4.09s/it]

Failed at step 2684


 92%|█████████▏| 2686/2928 [7:09:13<12:39,  3.14s/it]

Failed at step 2685


 92%|█████████▏| 2687/2928 [7:09:14<10:01,  2.50s/it]

Failed at step 2686


 92%|█████████▏| 2688/2928 [7:09:16<08:21,  2.09s/it]

Failed at step 2687


 92%|█████████▏| 2689/2928 [7:09:17<08:00,  2.01s/it]

Failed at step 2688


 92%|█████████▏| 2691/2928 [7:09:32<17:21,  4.39s/it]

Failed at step 2690


 92%|█████████▏| 2692/2928 [7:09:34<15:19,  3.90s/it]

Failed at step 2691


 92%|█████████▏| 2693/2928 [7:09:37<13:18,  3.40s/it]

Failed at step 2692


 92%|█████████▏| 2696/2928 [7:09:58<20:40,  5.35s/it]

Failed at step 2695


 92%|█████████▏| 2702/2928 [7:10:46<25:02,  6.65s/it]

Failed at step 2701


 94%|█████████▎| 2741/2928 [7:16:36<22:14,  7.14s/it]

Failed at step 2740


 94%|█████████▎| 2742/2928 [7:16:42<20:50,  6.72s/it]

Failed at step 2741


 94%|█████████▍| 2745/2928 [7:17:02<18:27,  6.05s/it]

Failed at step 2744


 96%|█████████▌| 2816/2928 [7:27:43<13:30,  7.24s/it]

Failed at step 2815


100%|██████████| 2928/2928 [7:44:46<00:00,  9.52s/it]


In [13]:
rmse = np.sqrt(np.mean((Y_true_all - Y_pred_all)**2))
cvrmse = rmse / Y_true_all.mean()
mae = np.mean(np.abs(Y_true_all - Y_pred_all))
r2 = 1 - np.sum((Y_true_all - Y_pred_all)**2) / np.sum((Y_true_all - Y_true_all.mean())**2)
print("RMSE: {:.2f}".format(rmse))
print("CVRMSE: {:.2f}".format(cvrmse))
print("MAE: {:.2f}".format(mae))
print("R2: {:.2f}".format(r2))

RMSE: 7.68
CVRMSE: 0.41
MAE: 5.28
R2: 0.59
