new non_stat kernel and more docs in gp2Scale

lbl-camera · Feb 10, 2023 · bafe85a · bafe85a
1 parent 4ddf2a4
commit bafe85a
Show file tree

Hide file tree

Showing 5 changed files with 289 additions and 44 deletions.
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -24,6 +24,7 @@ caption: Examples
 ---
 examples/single_task_test_notebook.ipynb
 examples/multi_task_test_notebook.ipynb
+examples/NoNStat.ipynb
 ```
 
 # fvGP - A Flexible Multi-Task GP Engine

diff --git a/examples/NoNStat.ipynb b/examples/NoNStat.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# This example shows how fvGP can be used to set up non-stationary kernels "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## To train the GP using the non-staionary kernel via local or hybrid optimizers fvGP accepts the gradient of the kernel as a callable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "#from gpcam.gp_optimizer import GPOptimizer\n",
+    "import matplotlib.pyplot as plt\n",
+    "from fvgp.gp import GP"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Make an example data set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = np.linspace(0,1,1000)\n",
+    "def f(x):\n",
+    "    return np.sin(5. * x) + np.cos(10. * x) + (2.* (x-0.4)**2) * np.cos(100. * x)\n",
+    "plt.plot(x,f(x))\n",
+    "x_data = np.random.rand(50)\n",
+    "y_data = f(x_data) + (np.random.rand(len(x_data))-0.5) * 0.5\n",
+    "plt.plot(x,f(x))\n",
+    "plt.scatter(x_data,y_data)\n",
+    "x_data = x_data.reshape(-1,1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define some kernels "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#stationary\n",
+    "def kernel(x1,x2,hps,obj):\n",
+    "    d = obj._get_distance_matrix(x1,x2)\n",
+    "    #print(\"eval\")\n",
+    "    return hps[0] * np.exp(-d**2/hps[1])\n",
+    "    \n",
+    "#non-stationary and gradient\n",
+    "def nskernel(x1,x2,hps,obj):\n",
+    "    d = obj._get_distance_matrix(x1,x2)\n",
+    "    x0 = np.array([[0.],[0.5],[1.0]])\n",
+    "    w = hps[1:-1]\n",
+    "    l = hps[-1]\n",
+    "    return obj.non_stat_kernel(x1,x2,x0,w,l) * np.exp(-d**2/hps[0])\n",
+    "\n",
+    "def nskernel_gradient(x1,x2,hps,obj):\n",
+    "    d = obj._get_distance_matrix(x1,x2)\n",
+    "    x0 = np.array([[0.],[0.5],[1.0]])\n",
+    "    w = hps[1:-1]\n",
+    "    l = hps[-1]\n",
+    "    grad = np.empty((len(hps), len(x1),len(x2)))\n",
+    "    grad[0]   = obj.non_stat_kernel(x1,x2,x0,w,l) * ((d/hps[0])**2) * np.exp(-d**2/hps[0])\n",
+    "    grad[1:]  = obj.non_stat_kernel_gradient(x1,x2,x0,w,l) * np.exp(-d**2/hps[0])\n",
+    "    return grad "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## First the stationary GP "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_gp1 = GP(1, x_data,y_data,np.ones((2)), gp_kernel_function=kernel)\n",
+    "my_gp1.train(np.array([[0.001,10.],[0.001,10.]]), method=\"global\")\n",
+    "\n",
+    "#let's make a prediction\n",
+    "mean1 = my_gp1.posterior_mean(x.reshape(-1,1))[\"f(x)\"]\n",
+    "var1 =  my_gp1.posterior_covariance(x.reshape(-1,1))[\"v(x)\"]\n",
+    "#print(my_gp1.hyperparameters)\n",
+    "#now we can ask for a new point"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_gp1.hyperparameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Wow the non-stationary GP. We will train globally so the gradient of the kernel is technically not required, but we will check it nontheless "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_gp2 = GP(1, x_data, y_data, np.ones((5)), \n",
+    "            gp_kernel_function=nskernel, gp_kernel_function_grad=nskernel_gradient,\\\n",
+    "            ram_economy=False)\n",
+    "\n",
+    "my_gp2.train(np.array([[0.001,10.],[-10.,10.],[-10.,10.],[-10.,10.],[0.1,10.]]), method = \"global\")\n",
+    "\n",
+    "\n",
+    "#let's make a prediction\n",
+    "mean2 = my_gp2.posterior_mean(x.reshape(-1,1))[\"f(x)\"]\n",
+    "var2 =  my_gp2.posterior_covariance(x.reshape(-1,1))[\"v(x)\"]\n",
+    "#print(my_gp2.hyperparameters)\n",
+    "#now we can ask for a new point"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a,b = my_gp2.test_log_likelihood_gradient(np.ones((5)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## We'll plot both models, starting eith the stationary. In most cases the non-stationary GP has half the approximation error  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize = (16,10))\n",
+    "plt.plot(x,mean1, label = \"posterior mean\")\n",
+    "plt.plot(x,f(x), label = \"latent function\")\n",
+    "plt.fill_between(x, mean1 - 3. * np.sqrt(var1), mean1 + 3. * np.sqrt(var1), alpha = 0.5, color = \"grey\", label = \"var\")\n",
+    "plt.scatter(x_data,y_data)\n",
+    "plt.legend()\n",
+    "print(\"error: \", np.sum(f(x)-mean1)**2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize = (16,10))\n",
+    "plt.plot(x,mean2, label = \"posterior mean\", linewidth = 4)\n",
+    "plt.plot(x,f(x), label = \"latent function\", linewidth = 4)\n",
+    "plt.fill_between(x, mean2 - 3. * np.sqrt(var2), mean2 + 3. * np.sqrt(var2), alpha = 0.5, color = \"grey\", label = \"var\")\n",
+    "plt.scatter(x_data,y_data)\n",
+    "plt.legend()\n",
+    "print(\"error: \", np.sum(f(x)-mean2)**2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fvgp",
+   "language": "python",
+   "name": "fvgp"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/fvgp/fvgp.py b/fvgp/fvgp.py
@@ -79,6 +79,9 @@ class provides all the methods described for the GP class.
         False. Note, the training will always use a linear solve instead of the inverse for stability reasons.
     ram_economy : bool, optional
         Only of interest if the gradient and/or Hessian of the marginal log_likelihood is/are used for the training.
+    args : user defined, optional
+        These optional arguments will be available as attribute in kernel and mean function definitions.
+
 
     """
     def __init__(
@@ -98,13 +101,15 @@ def __init__(
         gp_mean_function_grad = None,
         normalize_y = False,
         use_inv = False,
-        ram_economy = True
+        ram_economy = True,
+        args = None,
         ):
 
         self.x_data = np.array(points)
         self.y_data = np.array(values)
         self.input_space_dim = input_space_dim
         self.point_number, self.output_num, self.output_dim = len(points), output_number, output_space_dim
+        if args: self.args = args
         ###check the output dims
         if np.ndim(values) == 1:
             raise ValueError("the output number is 1, you can use GP for single-task GPs")

diff --git a/fvgp/gp.py b/fvgp/gp.py
@@ -81,6 +81,8 @@ class GP():
         and return a 2-D numpy array of shape V x V.
         If ram_economy=False, the function should be of the form f(points1, points2, hyperparameters) and return a numpy array of shape
         H x V x V, where H is the number of hyperparameters. V is the number of points. CAUTION: This array will be stored and is very large.
+    args : user defined, optional
+        These optional arguments will be available as attribute in kernel and mean function definitions.
 
 
 
@@ -110,7 +112,7 @@ def __init__(
         normalize_y = False,
         use_inv = False,
         ram_economy = True,
-        non_stat_params = None,
+        args = None,
         ):
         if input_space_dim != len(points[0]):
             raise ValueError("input space dimensions are not in agreement with the point positions given")
@@ -123,6 +125,7 @@ def __init__(
         self.y_data = np.array(values)
         self.compute_device = compute_device
         self.ram_economy = ram_economy
+        if args: self.args = args
 
         self.use_inv = use_inv
         self.K_inv = None
@@ -646,16 +649,21 @@ def log_likelihood_hessian(self, hyperparameters):
     def test_log_likelihood_gradient(self,hyperparameters):
         thps = np.array(hyperparameters)
         grad = np.empty((len(thps)))
-        eps = 1e-4
+        eps = 1e-6
         for i in range(len(thps)):
             thps_aux = np.array(thps)
             thps_aux[i] = thps_aux[i] + eps
             grad[i] = (self.log_likelihood(thps_aux) - self.log_likelihood(thps))/eps
         analytical = self.log_likelihood_gradient(thps)
-        if np.linalg.norm(grad-analytical) > 1e-1: 
+        if np.linalg.norm(grad-analytical) > np.linalg.norm(grad)/100.0:
             print("Gradient possibly wrong")
             print(grad)
             print(analytical)
+        else:
+            print("Gradient correct")
+            print(grad)
+            print(analytical)
+
         return grad, analytical
     ##################################################################################
     ##################################################################################
@@ -1762,7 +1770,7 @@ def _dgdl(self,x,x0,w,l):
     ##################################################################################
     def d_gp_kernel_dx(self, points1, points2, direction, hyperparameters):
         new_points = np.array(points1)
-        epsilon = 1e-6
+        epsilon = 1e-8
         new_points[:,direction] += epsilon
         a = self.kernel(new_points, points2, hyperparameters,self)
         b = self.kernel(points1,    points2, hyperparameters,self)
@@ -1772,7 +1780,7 @@ def d_gp_kernel_dx(self, points1, points2, direction, hyperparameters):
     def d_gp_kernel_dh(self, points1, points2, direction, hyperparameters):
         new_hyperparameters1 = np.array(hyperparameters)
         new_hyperparameters2 = np.array(hyperparameters)
-        epsilon = 1e-6
+        epsilon = 1e-8
         new_hyperparameters1[direction] += epsilon
         new_hyperparameters2[direction] -= epsilon
         a = self.kernel(points1, points2, new_hyperparameters1,self)