# Model Selection

### Import libreries

In [1]:
from sklearn.gaussian_process.kernels import *
from sklearn.gaussian_process import GaussianProcessRegressor
from scipy.interpolate import griddata
from scipy.stats import norm
import numpy as np
import pandas as pd

### Read data

In [2]:
X = pd.read_csv("./train_x.csv").to_numpy()
y = pd.read_csv("./train_y.csv").to_numpy()

### Interpolate data to reduced plane

In [3]:
grid_x, grid_y = np.mgrid[0:0.9988:50j, 0:0.9988:50j]
grid_z0 = griddata(X, y, (grid_x, grid_y), method='nearest')
interpolated_data_y = pd.DataFrame(grid_z0.ravel(), columns = ['pm25'])
coor = np.array(list(zip(grid_x.ravel(), grid_y.ravel())))

### Hyperparameter Tuning

In [4]:
kernels = [RBF(),
           Matern(nu=0.5),
           Matern(nu=1.5),
           Matern(nu=2.5),
           RationalQuadratic(),
           ExpSineSquared(),
           DotProduct() + WhiteKernel()]

In [12]:
for kernel in kernels:
    gpc = GaussianProcessRegressor(kernel=kernel, 
                                   random_state=0, 
                                   n_restarts_optimizer=100,
                                   alpha = 0.1).fit(coor, interpolated_data_y.to_numpy())
    print(gpc.kernel_, ' : ', gpc.log_marginal_likelihood())

RBF(length_scale=0.0224)  :  -230199.7803302184


  K_gradient = K[..., np.newaxis] * D \


Matern(length_scale=0.0402, nu=0.5)  :  -147630.3189565221
Matern(length_scale=0.0297, nu=1.5)  :  -174273.92580377776
Matern(length_scale=0.0269, nu=2.5)  :  -188743.0522545688
RationalQuadratic(alpha=0.528, length_scale=0.0119)  :  -120489.31124962137
ExpSineSquared(length_scale=0.00612, periodicity=23)  :  -230197.91099507353
DotProduct(sigma_0=18.2) + WhiteKernel(noise_level=223)  :  -10346.768229266689
