In [2]:
 '''
 Train and run Gaussian Processes. Evaluate and compare the predictions using at least two differnt kernels via 10-fold cross-validation with a suitable error measure (we recommend negative log predictiv density as it takes the predictive uncertainty into account).

 Prakrit Shrestha | 2018
 Crime Data Analysis - Running a basic GP


 '''
from common import *
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import matplotlib.pyplot as plt



In [3]:
data = pd.read_csv("../raw_data/crime_processed_neighbourhood.csv").as_matrix()
X = data[:, [0,1,2,3,4,5,6,7,8,10]]
Y = data[:, 9]

In [7]:
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))

In [8]:
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

In [None]:
gp.fit(X, Y)

In [None]:
y_pred, sigma = gp.predict(x, return_std=True)

In [None]:
fig = plt.figure()
plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
plt.plot(x, y_pred, 'b-', label=u'Prediction')
plt.fill(np.concatenate([x, x[::-1]]),
         np.concatenate([y_pred - 1.9600 * sigma,
                        (y_pred + 1.9600 * sigma)[::-1]]),
         alpha=.5, fc='b', ec='None', label='95% confidence interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(-10, 20)
plt.legend(loc='upper left')