# GPs on Non-Euclidean Input Spaces
GPs on non-Euclidean input spaces have become more and more relevant in recent years. fvgp can be used for that purpose as long as a cvalid kernel is provided. Of course, if mean functions and noise functions are also provided, they have to operate on these non-Euclidean spaces. 

In this example, we run a small GP on words. It's a proof of concept, the results are not super relevant

In [1]:
#install the newest version of fvgp
#!pip install fvgp~=4.7.2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from fvgp import GP
from dask.distributed import Client
%load_ext autoreload
%autoreload 2

## Single Task

In [3]:
#making the x_data a set will allow us to put any objects or structures into it.
x_data = ['hello','world','this','is','fvgp']
y_data = np.array([2.,1.9,1.8,3.0,5.])


from fvgp.kernels import *
def string_distance(string1, string2):
    difference = abs(len(string1) - len(string2))
    common_length = min(len(string1),len(string2))
    string1 = string1[0:common_length]
    string2 = string2[0:common_length]
    
    for i in range(len(string1)):
        if string1[i] != string2[i]:
            difference += 1.

    return difference


def kernel(x1,x2,hps):
    d = np.zeros((len(x1),len(x2)))
    count1 = 0
    for string1 in x1:
        count2 = 0
        for string2 in x2:
            d[count1,count2] = string_distance(string1,string2)
            count2 += 1
        count1 += 1
    return hps[0] * matern_kernel_diff1(d,hps[1])
    



my_gp = GP(x_data,y_data,init_hyperparameters=np.ones((2)), kernel_function=kernel)

bounds = np.array([[0.001,100.],[0.001,100]])
my_gp.train(hyperparameter_bounds=bounds)

print("hyperparameters: ", my_gp.hyperparameters)
print("prediction : ",my_gp.posterior_mean(['full'])["m(x)"])
print("uncertainty: ",np.sqrt(my_gp.posterior_covariance(['full'])["v(x)"]))

hyperparameters:  [67.57968103 37.56308367]
prediction :  [3.36628537]
uncertainty:  [0.63285188]


  self.likelihood = GPlikelihood(self.data,


## Multi-Task Capability on Non-Euclidean Spaces

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from fvgp import GP
import plotly.graph_objects as go
from itertools import product

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
x_data = ['frf','ferfe','ferf','febhn']
y_data = np.zeros((len(x_data),5))


y_data[:,0] = np.random.rand(len(x_data))
y_data[:,1] = np.random.rand(len(x_data))
y_data[:,2] = np.random.rand(len(x_data))
y_data[:,3] = np.random.rand(len(x_data))
y_data[:,4] = np.random.rand(len(x_data))

#it is good practice to check the format of the data
print(len(x_data))
print(y_data.shape)

4
(4, 5)


In [6]:
def string_distance(string1, string2):
    difference = abs(len(string1) - len(string2))
    common_length = min(len(string1),len(string2))
    string1 = string1[0:common_length]
    string2 = string2[0:common_length]
    
    for i in range(len(string1)):
        if string1[i] != string2[i]:
            difference += 1.

    return difference


from fvgp.kernels import matern_kernel_diff1
def kernel(x1,x2,hps):
    d = np.zeros((len(x1),len(x2)))
    count1 = 0
    for entry in x1:
        string1 = entry[0]
        count2 = 0
        for entry2 in x2:
            string2 = entry2[0]
            d[count1,count2] = string_distance(string1,string2)
            count2 += 1
        count1 += 1
    return hps[0] * matern_kernel_diff1(d,hps[1])
    

bounds = np.array([[0.001,100.],[0.001,100]])

In [7]:
from fvgp import fvGP
my_gp2 = fvGP(x_data,y_data,init_hyperparameters=np.ones((2)),
              kernel_function=kernel
             )
print("Global Training in progress")
my_gp2.train(hyperparameter_bounds=bounds, max_iter = 20)

Global Training in progress


array([40.30481518, 56.91574387])

In [8]:
x_pred = ["dwed","dwe"]
my_gp2.posterior_mean(x_pred, x_out = np.array([0,1,2,3]))
my_gp2.posterior_mean(x_pred)

{'x': ['dwed', 'dwe'],
 'm(x)': array([[2.92245968, 2.92245968, 2.92245968, 2.92245968, 2.92245968],
        [1.77974417, 1.77974417, 1.77974417, 1.77974417, 1.77974417]]),
 'm(x)_flat': array([2.92245968, 1.77974417, 2.92245968, 1.77974417, 2.92245968,
        1.77974417, 2.92245968, 1.77974417, 2.92245968, 1.77974417]),
 'x_pred': [['dwed', np.int64(0)],
  ['dwe', np.int64(0)],
  ['dwed', np.int64(1)],
  ['dwe', np.int64(1)],
  ['dwed', np.int64(2)],
  ['dwe', np.int64(2)],
  ['dwed', np.int64(3)],
  ['dwe', np.int64(3)],
  ['dwed', np.int64(4)],
  ['dwe', np.int64(4)]]}