Merge 004915e into 2b90bc3

theogf · Jul 19, 2022 · 8e120b5 · 8e120b5
2 parents 2b90bc3 + 004915e
commit 8e120b5
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 108 deletions.
diff --git a/docs/examples/gpclassification.jl b/docs/examples/gpclassification.jl
@@ -17,39 +17,47 @@ data.Class[data.Class .== 2] .= -1
 data = Matrix(data)
 X = data[:, 1:2]
 Y = Int.(data[:, end]);
-(X_train, y_train), (X_test, y_test) = splitobs((X, Y), 0.5, ObsDim.First())
-# ### We create a function to visualize the data
+(X_train, y_train), (X_test, y_test) = splitobs((X, Y), 0.5, ObsDim.First()) # We split the data into train and test set
 
+# ### We create a function to visualize the data
 function plot_data(X, Y; size=(300, 500))
     return Plots.scatter(
-        eachcol(X)...; group=Y, alpha=0.2, markerstrokewidth=0.0, lab="", size=size
+        eachcol(X)...; xlabel="x", ylabel="y", group=Y, alpha=0.2, markerstrokewidth=0.0, lab="", size=size
     )
 end
-plot_data(X, Y; size=(500, 500))
+plot_data(X, Y; size=(500, 800))
 
-# ### Run sparse classification with increasing number of inducing points
-Ms = [4, 8, 16, 32, 64]
+# ## Model initialization and training
+# Using Gaussian processes to solve binary classification problem is usually defined as
+# ```math
+#   y \sim \mathrm{Bernoulli}(h(f))
+# ```
+# where ``h`` is the inverse link.
+# Multiple choices exist for ``h`` but we will focus mostly on ``h(x)=\sigma(x)=(1+\exp(-x))^{-1}``, i.e. the logistic function.
+# ### Run sparse classification with an increasing number of inducing points
+Ms = [4, 8, 16, 32, 64] # Number of inducing points
 models = Vector{AbstractGPModel}(undef, length(Ms) + 1)
-kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)
+kernel = with_lengthscale(SqExponentialKernel(), 1.0) # We create a standard kernel with lengthscale 1
 for (i, num_inducing) in enumerate(Ms)
     @info "Training with $(num_inducing) points"
     m = SVGP(
         kernel,
         LogisticLikelihood(),
         AnalyticVI(),
-        inducingpoints(KmeansAlg(num_inducing), X);
-        optimiser=false,
-        Zoptimiser=false,
+        inducingpoints(KmeansAlg(num_inducing), X); # Z is selected via the kmeans algorithm
+        optimiser=false, # We keep the kernel parameters fixed
+        Zoptimiser=false, # We keep the inducing points locations fixed
     )
-    @time train!(m, X_train, y_train, 20)
-    models[i] = m
+    @time train!(m, X_train, y_train, 5) # We train the model on the training data for 5 iterations
+    models[i] = m # And store the model
 end
-# ### Running the full model
+# ### We initiliaze and train the non sparse model
 @info "Running full model"
 mfull = VGP(X_train, y_train, kernel, LogisticLikelihood(), AnalyticVI(); optimiser=false)
 @time train!(mfull, 5)
 models[end] = mfull
 
+# ## Prediction visualization
 # ### We create a prediction and plot function on a grid
 function compute_grid(model, n_grid=50)
     mins = [-3.25, -2.85]
@@ -95,9 +103,9 @@ Plots.plot(
     plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
 )
 
-# ## Bayesian SVM vs Logistic
-# ### We now create a model with the Bayesian SVM likelihood
-
+# ## Bayesian SVM vs Logistic link
+# ### We now create another full model but with the Bayesian SVM link
+@info "Running model with Bayesian SVM Likelihood"
 mbsvm = VGP(X_train, y_train, kernel, BayesianSVM(), AnalyticVI(); optimiser=false)
 @time train!(mbsvm, 5)
 # ### And compare it with the Logistic likelihood

diff --git a/docs/examples/gpevents.jl b/docs/examples/gpevents.jl
@@ -3,67 +3,90 @@
 # ## Preliminary steps
 #
 # ### Loading necessary packages
-using Plots
 using AugmentedGaussianProcesses
 using Distributions
+using Plots
+default(msw=0.0, alpha=0.5, lw=3.0)
 
-# ## Creating some random data
+# ### Creating some random data
+# We first create some random data for the negative binomial problem
+# using the generative model
 n_data = 200
-X = (rand(n_data) .- 0.5) * 40
+x = (rand(n_data) .- 0.5) * 20
 r = 5.0
-Y = rand.(NegativeBinomial.(r, AGP.logistic.(sin.(X))))
-scatter(X, Y)
+y_bin = rand.(NegativeBinomial.(r, AGP.logistic.(-sin.(x)))) 
+# We use -sin because of the difference of definition between Wikipedia and Distributions.jl.
+# We do the same for the Poisson problem
+λ = 10.0
+y_poisson = rand.(Poisson.(λ * AGP.logistic.(sin.(x))))
+# And visualize the resulting data
+plot(
+    scatter(x, y_bin; lab="", title="Negative Binomial"),
+    scatter(x, y_poisson; lab="", title="Poisson");
+    xlabel="x",
+    ylabel="y"
+)
 
-# ## Run GP model with negative binomial likelihood to learn p 
-kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)
+# ## Initialization and training of the models
+# ### Run GP model with negative binomial likelihood to learn p 
+kernel = with_lengthscale(SqExponentialKernel(), 1.0)
 m_negbinomial = VGP(
-    X, Y, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
+    x, y_bin, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
 )
-@time train!(m_negbinomial, 20)
+@time train!(m_negbinomial, 5); # Train the model for 5 iterations
 
-# ## Running the same model but with a Poisson likelihood
-kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)
+# ### Running the same model but with the Poisson likelihood
+kernel = with_lengthscale(SqExponentialKernel(), 1.0)
 m_poisson = VGP(
-    X, Y, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
+    x, y_poisson, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
 )
-@time train!(m_poisson, 20)
+@time train!(m_poisson, 5);
 
-# Prediction and plot function on a grid
-# Create a grid and compute prediction on it
-function compute_grid(model, n_grid=50)
-    mins = -20
-    maxs = 20
+# ## Prediction and plot function on a grid
+# Create a grid and compute predictive mean of `y` on it
+function compute_pred_y_grid(model, n_grid=50)
+    mins = -12
+    maxs = 12
     x_grid = range(mins, maxs; length=n_grid) # Create a grid
-    y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
+    y_grid, sig_y_grid = proba_y(model, x_grid) # Predict the mean and variance on the grid
     return y_grid, sig_y_grid, x_grid
 end
-
+# Create a grid and compute the predictive mean of `f` on it
+function compute_pred_f_grid(model, n_grid=50)
+    mins = -12
+    maxs = 12
+    x_grid = range(mins, maxs; length=n_grid) # Create a grid
+    f_grid, f_var_grid = predict_f(model, x_grid; cov=true) # Predict the mean and variance on the grid
+    return f_grid, f_var_grid, x_grid
+end
 # Plot the data as a scatter plot
-function plot_data(X, Y)
-    return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(800, 500))
+function plot_data(x, y)
+    return Plots.scatter(x, y; lab="", size=(800, 500))
+end
+
+function plot_f(model)
+    f, f_var, x_grid = compute_pred_f_grid(model, 100)
+    plot(x_grid, sin; lab="sin", title="Latent f")
+    plot!(x_grid, f; ribbon=2 * sqrt.(f_var), lab="f")
 end
 
-function plot_model(model, X, Y, title=nothing)
-    n_grid = 100
-    y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
-    p = plot_data(X, Y)
-    Plots.plot!(
-        p,
-        x_grid,
-        y_grid;
-        ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations
-        title=title,
-        color="red",
-        lab="",
-        linewidth=3.0,
-    )
-    return p
-end;
+function plot_y(model, x, y_bin)
+    pred_y, pred_y_var, x_grid = compute_pred_y_grid(model, 100)
+    true_p_y = model.likelihood.(sin.(x_grid))
+    true_y, true_var_y = mean.(true_p_y), var.(true_p_y)
+    plot_data(x, y_bin)
+    plot!(x_grid, true_y; ribbon=2 * sqrt.(true_var_y), lab="true p(y|f)", title="Expectation of y")
+    plot!(x_grid, pred_y; ribbon=2 * sqrt.(pred_y_var), label="pred p(y|f)")
+end
 
-# ## Comparison between the two likelihoods
-Plots.plot(
-    plot_model.(
-        [m_negbinomial, m_poisson], Ref(X), Ref(Y), ["Neg. Binomial", "Poisson"]
-    )...;
-    layout=(1, 2),
+# ## Plotting the comparison results
+## We first compare the results for the Negative Binomial likelihood
+plot(
+    plot_f(m_negbinomial),
+    plot_y(m_negbinomial, x, y_bin)
 )
+## And we do the same for the Poisson likelihood
+plot(
+    plot_f(m_poisson),
+    plot_y(m_poisson, x, y_poisson)
+)
diff --git a/docs/examples/gpregression.jl b/docs/examples/gpregression.jl
@@ -5,18 +5,18 @@ using AugmentedGaussianProcesses
 using Distributions
 using Plots
 
-# We create a toy dataset with X ∈ [-20, 20] and y = 5 * sinc(X)
+# We create a toy dataset with `x ∈ [-10, 10]` and `y = 5 * sinc(X)``
 N = 1000
-X = reshape((sort(rand(N)) .- 0.5) * 40.0, N, 1)
-σ = 0.01
+x = (sort(rand(N)) .- 0.5) * 20.0
+σ = 0.01 # Standard gaussian noise
 
 function latent(x)
     return 5.0 * sinc.(x)
 end
-Y = vec(latent(X) + σ * randn(N));
+y = latent(x) + σ * randn(N);
 
 # Visualization of the data :
-scatter(X, Y; lab="")
+scatter(x, y; lab="")
 
 # ## Gaussian noise
 
@@ -28,27 +28,27 @@ Ms = [4, 8, 16, 32, 64];
 # Create an empty array of GPs
 models = Vector{AbstractGPModel}(undef, length(Ms) + 1);
 # Chose a kernel
-kernel = SqExponentialKernel();#  + PeriodicKernel()
+kernel = with_lengthscale(SqExponentialKernel(), 1.0);
 # And Run sparse classification with an increasing number of inducing points
 for (index, num_inducing) in enumerate(Ms)
     @info "Training with $(num_inducing) points"
     m = SVGP(
         kernel, # Kernel
         GaussianLikelihood(σ), # Likelihood used
         AnalyticVI(), # Inference usede to solve the problem
-        inducingpoints(KmeansAlg(num_inducing), X); # Inducing points initialized with kmeans
+        range(-10, 10; length=num_inducing); # Simple range
         optimiser=false, # Keep kernel parameters fixed
         Zoptimiser=false, # Keep inducing points locations fixed
     )
-    @time train!(m, X, Y, 100) # Train the model for 100 iterations
+    @time train!(m, x, y, 100) # Train the model for 100 iterations
     models[index] = m # Save the model in the array
 end
 
 # Train the model without any inducing points (no approximation)
 @info "Training with full model"
 mfull = GP(
-    X,
-    Y,
+    x,
+    y,
     kernel;
     noise=σ,
     opt_noise=false, # Keep the noise value fixed
@@ -59,19 +59,19 @@ models[end] = mfull;
 
 # Create a grid and compute prediction on it
 function compute_grid(model, n_grid=50)
-    mins = -20
-    maxs = 20
+    mins = -10
+    maxs = 10
     x_grid = range(mins, maxs; length=n_grid) # Create a grid
     y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
     return y_grid, sig_y_grid, x_grid
 end;
 
 # Plot the data as a scatter plot
-function plotdata(X, Y)
-    return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(300, 500))
+function plotdata(x, y)
+    return Plots.scatter(x, y; alpha=0.33, msw=0.0, lab="", size=(300, 500))
 end
 
-function plot_model(model, X, Y, title=nothing)
+function plot_model(model, x, y, title=nothing)
     n_grid = 100
     y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
     title = if isnothing(title)
@@ -80,7 +80,7 @@ function plot_model(model, X, Y, title=nothing)
         title
     end
 
-    p = plotdata(X, Y)
+    p = plotdata(x, y)
     Plots.plot!(
         p,
         x_grid,
@@ -94,9 +94,9 @@ function plot_model(model, X, Y, title=nothing)
     if model isa SVGP # Plot the inducing points as well
         Plots.plot!(
             p,
-            vec(model.f[1].Z),
-            zeros(dim(model.f[1]));
-            msize=2.0,
+            model.f[1].Z,
+            mean(model.f[1]);
+            msize=5.0,
             color="black",
             t=:scatter,
             lab="",
@@ -106,7 +106,7 @@ function plot_model(model, X, Y, title=nothing)
 end;
 
 Plots.plot(
-    plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
+    plot_model.(models, Ref(x), Ref(y))...; layout=(2, 3), size=(800, 600)
 ) # Plot all models and combine the plots
 
 # ## Non-Gaussian Likelihoods
@@ -124,8 +124,8 @@ ngmodels = Vector{AbstractGPModel}(undef, length(likelihoods) + 1)
 for (i, l) in enumerate(likelihoods)
     @info "Training with the $(l)" # We need to use VGP
     m = VGP(
-        X,
-        Y, # First arguments are the input and output
+        x,
+        y, # First arguments are the input and output
         kernel, # Kernel
         l, # Likelihood used
         AnalyticVI(); # Inference usede to solve the problem
@@ -139,8 +139,9 @@ ngmodels[end] = models[end] # Add the Gaussian model
 # We can now repeat the prediction from before :
 Plots.plot(
     plot_model.(
-        ngmodels, Ref(X), Ref(Y), ["Student-T", "Laplace", "Heteroscedastic", "Gaussian"]
+        ngmodels, Ref(x), Ref(y), ["Student-T", "Laplace", "Heteroscedastic", "Gaussian"]
     )...;
     layout=(2, 2),
-    size=(1000, 200),
-) # Plot all models and combine the plots
+    ylims=(-8, 10),
+    size=(700, 400),
+)# Plot all models and combine the plots