In [33]:
import numpy as np
from scipy import spatial

In [64]:
# Import training data

train = np.genfromtxt('data/trn_data.csv', delimiter=',',skip_header=True)
print train[:5]

[[  40.582 -119.79    84.929]
 [  41.983 -123.6     87.388]
 [  41.85  -123.97    36.259]
 [  42.07  -123.01    84.913]
 [  41.953 -121.58    66.445]]


In [79]:
train.shape

(414, 3)

In [63]:
test = np.genfromtxt('data/tst_locations.csv', delimiter=',',skip_header=True)
print test[:5]

[[  41.988 -123.72 ]
 [  41.883 -124.13 ]
 [  41.833 -123.83 ]
 [  41.999 -121.7  ]
 [  41.928 -122.44 ]]


In [80]:
test.shape

(413, 2)

In [58]:
# Matrix of coordinates
X = train[:,:-1]
print X[:5]

[[  40.582 -119.79 ]
 [  41.983 -123.6  ]
 [  41.85  -123.97 ]
 [  42.07  -123.01 ]
 [  41.953 -121.58 ]]


In [59]:
# Matrix of measured values
Y = train[:,-1:]
print Y[:5]

[[ 84.929]
 [ 87.388]
 [ 36.259]
 [ 84.913]
 [ 66.445]]


The common covariance formula we will use is the squared exponential:

$$
K_{SE}(x,x') = exp(- \frac{d^2}{2l^2})
$$

where $l$ is the characteristic length-scale of the Gaussian process (we will determine this experimentally).

In [55]:
def covariance(x, y, l):
    d = spatial.distance_matrix(x,y)
    K = np.exp(-(d**2) / (2*l*l))
    return K

In [56]:
covariance(X[:5], X[:5], 5)

array([[ 1.        ,  0.71922717,  0.68276505,  0.77751614,  0.90332325],
       [ 0.71922717,  1.        ,  0.99691299,  0.99291186,  0.92161658],
       [ 0.68276505,  0.99691299,  1.        ,  0.98078697,  0.8918528 ],
       [ 0.77751614,  0.99291186,  0.98078697,  1.        ,  0.95966426],
       [ 0.90332325,  0.92161658,  0.8918528 ,  0.95966426,  1.        ]])

In [65]:
K = covariance(X,X,5)

In [74]:
K.shape

(414, 414)

In [77]:
covariance(X,test,5).shape

(414, 413)

In [78]:
test.shape

(413, 2)

In [67]:
np.var(Y)*np.eye(len(X))

array([[ 715.98674872,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,  715.98674872,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,  715.98674872, ...,    0.        ,
           0.        ,    0.        ],
       ..., 
       [   0.        ,    0.        ,    0.        , ...,  715.98674872,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
         715.98674872,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,  715.98674872]])

In [73]:
np.linalg.inv(K+np.var(Y)*np.eye(len(X))).shape

(414, 414)

In [97]:
def predictive_mean(x, x_test,y,l):
    
    K_xtest_x = covariance(x_test, x, l)

    K = covariance(x, x, l)
    
    sigma_sq_I = np.var(y)*np.eye(len(x))
    inv = np.linalg.inv(K+sigma_sq_I)
    
    return K_xtest_x.dot(inv).dot(y)

In [101]:
predictive_mean(X, test, Y, 3)

array([[  9.41723824],
       [  8.70858923],
       [  9.51520732],
       [ 12.47291387],
       [ 11.94907532],
       [ 10.5234409 ],
       [ 12.41591716],
       [ 11.40199677],
       [ 13.26123846],
       [ 12.05041584],
       [ 13.33290745],
       [ 13.61729011],
       [ 13.78219311],
       [ 13.8185706 ],
       [ 13.81228293],
       [ 11.52629376],
       [ 14.56004661],
       [ 13.41970696],
       [ 14.47950623],
       [ 14.46143979],
       [ 14.48446353],
       [ 14.39455684],
       [ 13.47014011],
       [ 11.35978247],
       [ 12.96305892],
       [ 13.63185855],
       [ 12.95511591],
       [ 12.5118485 ],
       [ 12.42852501],
       [ 15.14559225],
       [ 15.96041838],
       [ 14.53163704],
       [ 15.89325798],
       [ 10.88695767],
       [ 15.36302522],
       [ 14.72799484],
       [ 15.23160367],
       [ 15.13034814],
       [ 16.45135523],
       [ 15.90758053],
       [ 16.8072271 ],
       [ 15.81942598],
       [ 15.71266013],
       [ 16