In [1]:
import math
import tqdm
import torch
import gpytorch
import numpy as np
import pandas as pd
import sys
import os
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
np.random.seed(0)
torch.manual_seed(0)

model_name = sys.argv[0].split('/')[-1].replace('.py','')
path = '../../../data_and_results/u-air/production/pm25_beijing_best36/quadratic/'
fold = '0'
for f_id in [str(i).zfill(5) for i in range(10)]:
    
    trn_X = torch.tensor(np.load(path+'data/fold_'+fold+'/train/X/'+f_id+'.npz')['arr_0'], dtype=torch.float32)
    trn_y = torch.tensor(np.load(path+'data/fold_'+fold+'/train/y/'+f_id+'.npz')['arr_0'], dtype=torch.float32)
    tst_X = torch.tensor(np.load(path+'data/fold_'+fold+'/test/X/'+f_id+'.npz')['arr_0'], dtype=torch.float32)
    tst_y = torch.tensor(np.load(path+'data/fold_'+fold+'/test/y/'+f_id+'.npz')['arr_0'], dtype=torch.float32)
    data_dim = trn_X.size(-1)
    n_dim = 2

    class LargeFeatureExtractor(torch.nn.Sequential):
        def __init__(self):
            super(LargeFeatureExtractor, self).__init__()
            self.add_module('linear1', torch.nn.Linear(data_dim, 64))
            self.add_module('relu1', torch.nn.ReLU())
            self.add_module('linear2', torch.nn.Linear(64, 32))
            self.add_module('relu2', torch.nn.ReLU())
            self.add_module('linear3', torch.nn.Linear(32, 16))
            self.add_module('relu3', torch.nn.ReLU())
            self.add_module('linear4', torch.nn.Linear(16, n_dim))

    feature_extractor = LargeFeatureExtractor()

    scaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/'+f_id+'.pickle')

    class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
#             self.covar_module = gpytorch.kernels.GridInterpolationKernel(
#                 gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=n_dim)),
#                 num_dims=n_dim, grid_size=100
#             )
#             print(gpytorch.utils.grid.choose_grid_size(train_x))
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=n_dim))
            self.feature_extractor = feature_extractor

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            # We're also scaling the features so that they're nice values
            projected_x = self.feature_extractor(x)
#           Min-Max scaling
#             projected_x = projected_x - projected_x.min(0)[0]
#             projected_x = 2 * (projected_x / projected_x.max(0)[0]) - 1
#           Standard scaling
#             print('before', type(projected_x), projected_x.shape)
#             print(projected_x.mean(axis=0))
            projected_x = projected_x - projected_x.mean(axis=0)
            projected_x = projected_x/projected_x.std(axis=0)
#             print('after', type(projected_x), projected_x.shape)
            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(trn_X, trn_y.ravel(), likelihood)
    training_iterations = 200

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam([
        {'params': model.feature_extractor.parameters()},
        {'params': model.covar_module.parameters()},
        {'params': model.mean_module.parameters()},
        {'params': model.likelihood.parameters()},
    ], lr=0.01)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    def train():
        iterator = tqdm.notebook.tqdm(range(training_iterations))
        losses = []
        for i in iterator:
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(trn_X)
    #         print(output.shape, type(output), output)
            # Calc loss and backprop derivatives
            loss = -mll(output, trn_y.ravel())
#             print(1 if loss>np.inf else 0)
            losses.append(loss)
            loss.backward()
            iterator.set_postfix(loss=loss.item())
            optimizer.step()
        return losses

    losses = train()
    print(tst_X.shape, trn_X.shape)    
    model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(tst_X).mean.numpy()

    pred_y = scaler.inverse_transform(preds)
    print(mean_absolute_error(tst_y.ravel(), pred_y))
    # if not os.path.exists(path+'results/'+model_name+'/fold_'+fold+'/'):
    #     os.makedirs(path+'results/'+model_name+'/fold_'+fold+'/')

    # np.savez_compressed(path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.npz', pred_y)
    # pd.to_pickle(model.param_array, path+'results/'+model_name+'/fold_'+fold+'/'+f_id+'.model')
#     plt.plot(losses)


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
37.318


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
21.92962


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
13.00882


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
16.357058


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
24.835165


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
12.250585


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
18.062805


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
11.551099


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
30.56658


  0%|          | 0/200 [00:00<?, ?it/s]

torch.Size([6, 6]) torch.Size([30, 6])
46.886597


In [5]:
opath = '../../../data_and_results/u-air/production/pm25_beijing_best36/quadratic/results/gp_rbf/fold_0/'
tpath = '../../../data_and_results/u-air/production/pm25_beijing_best36/quadratic/data/fold_0/'
for i in [str(i).zfill(5) for i in range(10)]:
    pred = np.load(opath+i+'.npz')['arr_0']
    tst = np.load(tpath+'test/y/'+i+'.npz')['arr_0']
    print(mean_absolute_error(tst, pred))

22.2453460301567
17.016735544217056
16.74793164514159
17.09705504922577
22.799159167430588
20.258048564615247
21.241996485071795
30.25906078389399
44.77660282120676
39.258164003554825


In [2]:
pred_y, tst_y

(array([216.42017, 218.27953, 207.00104, 251.74893, 244.90967, 348.6796 ],
       dtype=float32),
 tensor([[377.],
         [224.],
         [259.],
         [218.],
         [228.],
         [306.]]))