In [None]:
import init_notebook

from math import pi as PI
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm


from src.models.gaussian_process.regression import GaussianProcessRegressor
from src.models.gaussian_process.binary_classification import GaussianProcessClassifier
from src.models.gaussian_process.kernels import RadialBasisFunction, Periodic, ComposeMultiply

from src.data.data_helpers import load_diabetes_sklearn

%load_ext autoreload
%autoreload 2

In [None]:
def train_gp_rbf(X, y, variance=1, length_scale=1, noise_variacne=0):
    # variance controls the variance of the prior
    # length controls speed to "come back" to prior
    rbf = RadialBasisFunction(variance=variance, length=length_scale)

    # noise_variance controls how tightly the GP sticks to the training data
    gp = GaussianProcessRegressor(rbf, noise_variance=noise_variacne)
    gp.train(X, y)

    return gp

def train_gp_periodic(X, y, variance=1, length_scale=1, period=1, noise_variacne=0):
    # variance controls the variance of the prior
    # length controls speed to "come back" to prior
    p_kernel = Periodic(variance=variance, length=length_scale, period=period)

    # noise_variance controls how tightly the GP sticks to the training data
    gp = GaussianProcessRegressor(p_kernel, noise_variance=noise_variacne)
    gp.train(X, y)

    return gp

In [None]:
def plot_fit_example_1(X, y, *, variance=1, length_scale=1, noise_variacne=1, save_name=None):
    # Train GP
    rbf = RadialBasisFunction(variance=variance, length=length_scale)
    gp = GaussianProcessRegressor(rbf, noise_variance=noise_variacne)
    gp.train(X, y)

    # Get plot data
    X_test = np.linspace(0, 20, num=100).reshape(-1, 1)
    predictions, covariances  = gp.get_distribution(X_test)

    # Make prediction figure and plot
    prediction_fig = plt.figure()
    pred_graph = prediction_fig.add_subplot(111)
    pred_graph.set_xlabel('x')
    pred_graph.set_ylabel('y')
    
    # Plot fitted function and variances
    pred_graph.plot(X_test, predictions, color='#ED5050')
    pred_graph.plot(X_test, predictions+(covariances), color='#E3CFCF')
    pred_graph.plot(X_test, predictions-(covariances), color='#E3CFCF')

    # Scatter data points
    pred_graph.scatter(X, y, marker='x')

    # Save figure
    if save_name is not None:
        prediction_fig.savefig(f'task_1/{save_name}.png')

In [None]:
X = np.array([
    [0.1],
    [2],
    [2.1],
    [5],
    [9],
    [12]
])

y = np.array([
    [1.8],
    [2],
    [2.2],
    [1],
    [3],
    [3.4]
])

# (variance, length_scale, noise_variance, save_bool)
configs = [
    (1, 1, 0   , True),
    (2, 1, 0   , True),
    (1, 2, 0   , True),
    (1, 1, 0.007, True)
]

for v, l, n, s in configs:
    if s:
        plot_fit_example_1(X,y, 
            variance=v, 
            length_scale=l,
            noise_variacne=n,
            save_name=f'v={v}-l={l}-n={n}'
        )
    else:
        plot_fit_example_1(X,y, 
            variance=v,
            length_scale=l,
            noise_variacne=n
        )


In [None]:
from src.data.data_helpers import forrester_function, load_forrester

X = np.linspace(-5, 5, num=200)
y = forrester_function(X)

forrester_fig = plt.figure()

forrester_plot = forrester_fig.add_subplot(111)

forrester_plot.plot(X, y, color='#ED5050')

forrester_fig.tight_layout()

In [None]:
X_train, y_train, X_test, y_test = load_forrester(
    begin=-2.5, end=2.5, noise_variance=0, n_samples=50
)

In [None]:
gp = train_gp_rbf(X_train, y_train, variance=50, length_scale=5, noise_variacne=0)

X_plot = np.linspace(-5, 5, num=250).reshape(-1,1)
predictions, covariances  = gp.get_distribution(X_plot)

prediction_fig = plt.figure()
pred_graph = prediction_fig.add_subplot(111)

pred_graph.plot(X_plot, predictions+covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions-covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions)

pred_graph.scatter(X_train, y_train, marker='.', color='green')
pred_graph.scatter(X_test, y_test, marker='x', color='red')


In [None]:
gp = train_gp_periodic(X_train, y_train, variance=1, length_scale=1, period=2.5, noise_variacne=0)

X_plot = np.linspace(-5, 5, num=250).reshape(-1,1)
predictions, covariances  = gp.get_distribution(X_plot)

prediction_fig = plt.figure()
pred_graph = prediction_fig.add_subplot(111)

pred_graph.plot(X_plot, predictions+covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions-covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions)

pred_graph.scatter(X_train, y_train, marker='.', color='green')
pred_graph.scatter(X_test, y_test, marker='x', color='red')

In [None]:
p = Periodic(variance=1, length=3, period=1)
rbf = RadialBasisFunction(variance=1, length=1)
kernel = ComposeMultiply([rbf, p])

gp = GaussianProcessRegressor(kernel=kernel, noise_variance=0)
gp.train(X_train, y_train)

X_plot = np.linspace(-5, 5, num=250).reshape(-1,1)
predictions, covariances  = gp.get_distribution(X_plot)

prediction_fig = plt.figure()
pred_graph = prediction_fig.add_subplot(111)

pred_graph.plot(X_plot, predictions+covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions-covariances, color='#E3CFCF')
pred_graph.plot(X_plot, predictions)

pred_graph.scatter(X_train, y_train, marker='.', color='green')
pred_graph.scatter(X_test, y_test, marker='x', color='red')

In [None]:
X_train, y_train, X_test, y_test = load_diabetes_sklearn(validation_size=0.1)
pca = PCA(4)
scaler = StandardScaler()
X_train = pca.fit_transform(X_train)
X_train = scaler.fit_transform(X_train)

X_test = pca.transform(X_test)
X_test = scaler.transform(X_test)


In [None]:
best = -np.inf
best_config = {'v':0,'l':0,'n':0}
for v in np.linspace(7.3, 50, num=1):
    for l in np.linspace(0.8, 100, num=1):
        for n in np.linspace(2.17, 3, num=1):
            rbf = RadialBasisFunction(variance=v, length=l)

            gp = GaussianProcessRegressor(kernel=rbf, noise_variance=n)
            gp.train(X_train, y_train)

            current = gp.log_marginal_likelihood
            if current != np.inf and current > best:
                best = current
                best_config['v'] = v
                best_config['l'] = l
                best_config['n'] = n
                with open('tunning.txt', 'a') as file:
                    file.write(f'Likelihood: {current}\nConfig: {best_config}\n\n')

predictions, covariances = gp.get_distribution(X_test)
p_values = []
for pred, cov, target in zip(predictions, covariances, y_test):
    pred = pred[0]
    cov = cov[0]
    p_values.append(2 * (1 - norm.cdf(abs((target - pred) / cov))))

print(sum(errors)/len(errors))

ep_fig = plt.figure(figsize=(10,5))
ec_plot = ep_fig.add_subplot(111)
#ep_plot = ep_fig.add_subplot(212)

# ep_plot.scatter(abs(predictions.reshape(-1) - y_test), p_values, marker='.')
# ep_plot.set_xlabel('Absolute Error')
# ep_plot.set_ylabel('p-value')

ec_plot.scatter(abs(predictions.reshape(-1) - y_test), covariances, marker='.')
ec_plot.set_xlabel('Absolute Error')
ec_plot.set_ylabel('Variance')