# Model Selection

### Import libreries

In [23]:
import os
import typing

import sklearn.gaussian_process.kernels 
from sklearn.kernel_approximation import Nystroem
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
from matplotlib import cm
import numpy as np
import pandas as pd
import time

### Read data

In [3]:
X = pd.read_csv("./train_x.csv").to_numpy()
y = pd.read_csv("./train_y.csv").to_numpy()

In [9]:
grid_x, grid_y = np.mgrid[0:0.9988:50j, 0:0.9988:50j]

In [10]:
grid_z0 = griddata(X, y, (grid_x, grid_y), method='nearest')
interpolated_data_y = pd.DataFrame(grid_z0.ravel(), columns = ['pm25'])

In [11]:
coor = np.array(list(zip(grid_x.ravel(), grid_y.ravel())))

In [12]:
interpolated_data_X = pd.DataFrame(coor, columns = ['lon', 'lat'])

In [13]:
interpolated_data_X.to_csv('interpolated_X_2500.csv', index=False)
interpolated_data_y.to_csv('interpolated_y_2500.csv', index=False)

### Toy example for RBF

In [14]:
kernel = RBF()
start = time.time()
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood()

75.33604407310486


In [25]:
kernel = RBF()
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

kernel = Matern(nu=0.5)
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())
      
kernel = Matern(nu=1.5)
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

kernel = Matern(nu=2.5)
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

kernel = RationalQuadratic()
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

kernel = ExpSineSquared()
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

kernel = DotProduct() + WhiteKernel()
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0, 
                               n_restarts_optimizer=100).fit(coor, interpolated_data_y.to_numpy())
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())

RBF(length_scale=0.0168)
-305565.38943646516


  K_gradient = K[..., np.newaxis] * D \


Matern(length_scale=0.036, nu=0.5)
-165616.9917406278
Matern(length_scale=0.0253, nu=1.5)
-205393.44635639447
Matern(length_scale=0.0223, nu=2.5)
-228279.36014855502
RationalQuadratic(alpha=0.522, length_scale=0.0106)
-131886.14096834682
ExpSineSquared(length_scale=0.000532, periodicity=198)
-305565.3582388254
DotProduct(sigma_0=18.2) + WhiteKernel(noise_level=223)
-10346.768229266692


In [None]:
kernel = DotProduct(sigma_0=18.2) + WhiteKernel(noise_level=223)
gpc = GaussianProcessRegressor(kernel=kernel, 
                               random_state=0).fit(X, y)
print(gpc.kernel_)
print(gpc.log_marginal_likelihood())