v0.11.5/search_index.js

var documenterSearchIndex = {"docs":
[{"location":"comparison/#Julia-GP-Package-Comparison","page":"Julia GP Packages","title":"Julia GP Package Comparison","text":"","category":"section"},{"location":"comparison/#JuliaGaussianProcesses-Organization","page":"Julia GP Packages","title":"JuliaGaussianProcesses Organization","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"There is a common effort to bring the GP people together through the JuliaGP organization. We work on making the building blocks necessary for GP such as KernelFunctions.jl for kernels, AbstractGPs.jl for the basic GP definitions and more is coming. The long-term goal is to have AGP.jl depend on all of this elements and to use it as a wrapper.","category":"page"},{"location":"comparison/#This-comparison-is-now-quite-outdated-and-new-solutions-have-been-introduced","page":"Julia GP Packages","title":"🚧 This comparison is now quite outdated and new solutions have been introduced 🚧","text":"","category":"section"},{"location":"comparison/#AugmentedGaussianProcesses.jl-vs-[Stheno.jl](https://github.com/willtebbutt/Stheno.jl)-vs-[GaussianProcesses.jl](https://github.com/STOR-i/GaussianProcesses.jl)","page":"Julia GP Packages","title":"AugmentedGaussianProcesses.jl vs Stheno.jl vs GaussianProcesses.jl","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"There are already two other Gaussian Process packages in Julia, however their feature are quite orthogonal. They are roughly compared here: AGP.jl stands for AugmentedGaussianProcesses.jl and GP.jl for GaussianProcesses.jl","category":"page"},{"location":"comparison/#Likelihood","page":"Julia GP Packages","title":"Likelihood","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"Likelihood AGP.jl Stheno.jl GP.jl\nGaussian ✓ ✓ (multi-input/multi-output) ✓\nStudent-T ✓ ✖ ✓\nBernoulli ✓ (Logistic) ✖ ✓ (Probit)\nBayesian-SVM ✓ ✖ ✖\nPoisson ✓ ✖ ✓\nNegativeBinomial ✓ ✖ ✖\nExponential ✖ ✖ ✓\nMultiClass ✓ ✖ ✖","category":"page"},{"location":"comparison/#Inference","page":"Julia GP Packages","title":"Inference","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"Inference AGP.jl Stheno.jl GP.jl\nAnalytic (Gaussian only) ✓ ✓ ✓\nVariational Inference ✓ (Analytic and Num. Appr.) ✖ ✖\nStreaming VI ✓ ✖ ✖\nGibbs-Sampling ✓ ✖ ✖\nMCMC ✖ ✖ ✓\nExpec. Propag. ✖ ✖ ✖","category":"page"},{"location":"comparison/#Kernels","page":"Julia GP Packages","title":"Kernels","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"Kernel AGP.jl Stheno.jl GP.jl\nRBF/Squared Exponential ✓ ✓ ✓\nMatern ✓ ✖ ✓\nConst ✖ ✓ ✓\nLinear ✖ ✓ ✓\nPoly ✖ ✓ ✓\nPeriodic ✖ ✖ ✓\nExponentiated Quadratic ✖ ✓ ✖\nRational Quadratic ✖ ✓ ✓\nWiener ✖ ✓ ✖\nSum of kernel ✖ ✖ ✓\nProduct of kernels ✖ ✖ ✓","category":"page"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"Note that the kernels will be defered to MLKernels.jl in the future.","category":"page"},{"location":"comparison/#Other-features","page":"Julia GP Packages","title":"Other features","text":"","category":"section"},{"location":"comparison/","page":"Julia GP Packages","title":"Julia GP Packages","text":"Feature AGP.jl Stheno.jl GP.jl\nSparse GP ✓ ✖ ✓\nCustom prior Mean ✓ ✓ ✓\nHyperparam. Opt. ✓ ? ✓\nMultiOutput ✓ ✓ ✖\nOnline ✓ ✖ ✖","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/heteroscedastic.jl\"","category":"page"},{"location":"examples/heteroscedastic/#Gaussian-Process-with-Heteroscedastic-likelihoods","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"","category":"section"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"(Image: )","category":"page"},{"location":"examples/heteroscedastic/#Loading-necessary-packages","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"using AugmentedGaussianProcesses\nusing Distributions\nusing LinearAlgebra\nusing Plots\nusing Random\ndefault(; lw=3.0, msw=0.0)","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"using CairoMakie","category":"page"},{"location":"examples/heteroscedastic/#Model-generated-data","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Model generated data","text":"","category":"section"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"The heteroscedastic noise mean that the variance of the likelihood directly depends on the input. To model this with Gaussian process, we define a GP f for the mean and another GP g for the variance y sim f + epsilon where epsilon sim mathcalN(0 (lambda sigma(g))^-1) We create a toy dataset with X ∈ [-10, 10] and sample f, g and y given this same generative model","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"rng = MersenneTwister(42)\nN = 200\nx = (sort(rand(rng, N)) .- 0.5) * 20.0\nx_test = range(-10, 10; length=500)\nkernel = 5.0 * SqExponentialKernel() ∘ ScaleTransform(1.0) # Kernel function\nK = kernelmatrix(kernel, x) + 1e-5I # The kernel matrix\nf = rand(rng, MvNormal(K)); # We draw a random sample from the GP prior\nnothing #hide","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"We add a prior mean on g so that the variance does not become too big","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"μ₀ = -3.0\ng = rand(rng, MvNormal(μ₀ * ones(N), K))\nλ = 3.0 # The maximum possible precision\nσ = inv.(sqrt.(λ * AGP.logistic.(g))) # We use the following transform to obtain the std. deviation\ny = f + σ .* randn(N); # We finally sample the ouput\nnothing #hide","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"We can visualize the data:","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"n_sig = 2 # Number of std. dev. around the mean\nplot(x, f; ribbon=n_sig * σ, lab=\"p(y|f,g)\") # Mean and std. dev. of y\nscatter!(x, y; alpha=0.5, msw=0.0, lab=\"y\") # Observation samples","category":"page"},{"location":"examples/heteroscedastic/#Model-creation-and-training","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Model creation and training","text":"","category":"section"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"We will now use the augmented model to infer both f and g","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"model = VGP(\n    x,\n    y,\n    kernel,\n    HeteroscedasticLikelihood(λ),\n    AnalyticVI();\n    optimiser=true, # We optimise both the mean parameters and kernel hyperparameters\n    mean=μ₀,\n    verbose=1,\n)","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"Model training, we train for around 100 iterations to wait for the convergence of the hyperparameters","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"train!(model, 100);\nnothing #hide","category":"page"},{"location":"examples/heteroscedastic/#Predictions","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Predictions","text":"","category":"section"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"We can now look at the predictions and compare them with out original model","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"(f_m, g_m), (f_σ, g_σ) = predict_f(model, x_test; cov=true)\ny_m, y_σ = proba_y(model, x_test);\nnothing #hide","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"Let's first look at the differece between the latent f and g","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"plot(x, [f, g]; label=[\"f\" \"g\"])\nplot!(\n    x_test,\n    [f_m, g_m];\n    ribbon=[n_sig * f_σ n_sig * g_σ],\n    lab=[\"f_pred\" \"g_pred\"],\n    legend=true,\n)","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"But it's more interesting to compare the predictive probability of y directly:","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"plot(x, f; ribbon=n_sig * σ, lab=\"p(y|f,g)\")\nplot!(x_test, y_m; ribbon=n_sig * sqrt.(y_σ), lab=\"p(y|f,g) pred\")\nscatter!(x, y; lab=\"y\", alpha=0.2)","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"Or to explore the heteroscedasticity itself, we can look at the residuals","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"scatter(x, (f - y) .^ 2; yaxis=:log, lab=\"residuals\", alpha=0.2)\nplot!(x, σ .^ 2; lab=\"true σ²(x)\")\nplot!(x_test, y_σ; lab=\"predicted σ²(x)\")","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"","category":"page"},{"location":"examples/heteroscedastic/","page":"Gaussian Process with Heteroscedastic likelihoods","title":"Gaussian Process with Heteroscedastic likelihoods","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/sampling.jl\"","category":"page"},{"location":"examples/sampling/#Sampling-from-a-GP","page":"Sampling from a GP","title":"Sampling from a GP","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"(Image: )","category":"page"},{"location":"examples/sampling/#Preliminary-steps","page":"Sampling from a GP","title":"Preliminary steps","text":"","category":"section"},{"location":"examples/sampling/#Loading-necessary-packages","page":"Sampling from a GP","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"using Plots\nusing AugmentedGaussianProcesses\nusing Distributions\nusing LinearAlgebra","category":"page"},{"location":"examples/sampling/#Generating-some-random-data","page":"Sampling from a GP","title":"Generating some random data","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"kernel = SqExponentialKernel()\nx = range(0, 10; length=50)\nK = kernelmatrix(kernel, x)\nf = rand(MvNormal(K + 1e-8I)) # Sample a random GP\ny = rand.(Bernoulli.(AGP.logistic.(f)))\ny_sign = Int.(sign.(y .- 0.5))","category":"page"},{"location":"examples/sampling/#We-create-a-function-to-visualize-the-data","page":"Sampling from a GP","title":"We create a function to visualize the data","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"function plot_data(x, y; size=(300, 500))\n    return Plots.scatter(x, y; alpha=0.2, markerstrokewidth=0.0, lab=\"\", size=size)\nend\nplot_data(x, y; size=(500, 500))","category":"page"},{"location":"examples/sampling/#Run-the-variational-gaussian-process-approximation","page":"Sampling from a GP","title":"Run the variational gaussian process approximation","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"@info \"Running full model\"\nmfull = VGP(x, y_sign, kernel, LogisticLikelihood(), AnalyticVI(); optimiser=false)\n@time train!(mfull, 5)","category":"page"},{"location":"examples/sampling/#We-can-also-create-a-sampling-based-model","page":"Sampling from a GP","title":"We can also create a sampling based model","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"@info \"Sampling from model\"\nmmcmc = MCGP(x, y, kernel, LogisticLikelihood(), GibbsSampling(); optimiser=false)\nm = mmcmc\n@time samples = sample(mmcmc, 1000)","category":"page"},{"location":"examples/sampling/#We-can-now-visualize-the-results-of-both-models","page":"Sampling from a GP","title":"We can now visualize the results of both models","text":"","category":"section"},{"location":"examples/sampling/#We-first-plot-the-latent-function-f-(truth,-the-VI-estimate,-the-samples)","page":"Sampling from a GP","title":"We first plot the latent function f (truth, the VI estimate, the samples)","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"p1 = plot(x, f; label=\"true f\")\nplot!(x, samples; label=\"\", color=:black, alpha=0.02, lab=\"\")\nplot!(x, mean(mfull[1]); ribbon=sqrt.(var(mfull[1])), label=\"VI\")","category":"page"},{"location":"examples/sampling/#And-we-can-also-plot-the-predictions-vs-the-data","page":"Sampling from a GP","title":"And we can also plot the predictions vs the data","text":"","category":"section"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"p2 = plot_data(x, y; size=(600, 400))\nμ_vi, σ_vi = proba_y(mfull, x)\nplot!(x, μ_vi; ribbon=σ_vi, label=\"VI\")\nμ_mcmc, σ_mcmc = proba_y(mmcmc, x)\nplot!(x, μ_mcmc; ribbon=σ_mcmc, label=\"MCMC\")","category":"page"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"","category":"page"},{"location":"examples/sampling/","page":"Sampling from a GP","title":"Sampling from a GP","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/gpevents.jl\"","category":"page"},{"location":"examples/gpevents/#Gaussian-Process-for-Event-likelihoods","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"(Image: )","category":"page"},{"location":"examples/gpevents/#Preliminary-steps","page":"Gaussian Process for Event likelihoods","title":"Preliminary steps","text":"","category":"section"},{"location":"examples/gpevents/#Loading-necessary-packages","page":"Gaussian Process for Event likelihoods","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"using Plots\nusing AugmentedGaussianProcesses\nusing Distributions","category":"page"},{"location":"examples/gpevents/#Creating-some-random-data","page":"Gaussian Process for Event likelihoods","title":"Creating some random data","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"n_data = 200\nX = (rand(n_data) .- 0.5) * 40\nr = 5.0\nY = rand.(NegativeBinomial.(r, AGP.logistic.(sin.(X))))\nscatter(X, Y)","category":"page"},{"location":"examples/gpevents/#Run-GP-model-with-negative-binomial-likelihood-to-learn-p","page":"Gaussian Process for Event likelihoods","title":"Run GP model with negative binomial likelihood to learn p","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)\nm_negbinomial = VGP(\n    X, Y, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2\n)\n@time train!(m_negbinomial, 20)","category":"page"},{"location":"examples/gpevents/#Running-the-same-model-but-with-a-Poisson-likelihood","page":"Gaussian Process for Event likelihoods","title":"Running the same model but with a Poisson likelihood","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)\nm_poisson = VGP(\n    X, Y, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2\n)\n@time train!(m_poisson, 20)","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"Prediction and plot function on a grid Create a grid and compute prediction on it","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"function compute_grid(model, n_grid=50)\n    mins = -20\n    maxs = 20\n    x_grid = range(mins, maxs; length=n_grid) # Create a grid\n    y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid\n    return y_grid, sig_y_grid, x_grid\nend","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"Plot the data as a scatter plot","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"function plot_data(X, Y)\n    return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab=\"\", size=(800, 500))\nend\n\nfunction plot_model(model, X, Y, title=nothing)\n    n_grid = 100\n    y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)\n    p = plot_data(X, Y)\n    Plots.plot!(\n        p,\n        x_grid,\n        y_grid;\n        ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations\n        title=title,\n        color=\"red\",\n        lab=\"\",\n        linewidth=3.0,\n    )\n    return p\nend;\nnothing #hide","category":"page"},{"location":"examples/gpevents/#Comparison-between-the-two-likelihoods","page":"Gaussian Process for Event likelihoods","title":"Comparison between the two likelihoods","text":"","category":"section"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"Plots.plot(\n    plot_model.(\n        [m_negbinomial, m_poisson], Ref(X), Ref(Y), [\"Neg. Binomial\", \"Poisson\"]\n    )...;\n    layout=(1, 2),\n)","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"","category":"page"},{"location":"examples/gpevents/","page":"Gaussian Process for Event likelihoods","title":"Gaussian Process for Event likelihoods","text":"This page was generated using Literate.jl.","category":"page"},{"location":"api/#API-Library","page":"API","title":"API Library","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"","category":"page"},{"location":"api/","page":"API","title":"API","text":"Pages = [\"api.md\"]","category":"page"},{"location":"api/","page":"API","title":"API","text":"CurrentModule = AugmentedGaussianProcesses","category":"page"},{"location":"api/#Module","page":"API","title":"Module","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"AugmentedGaussianProcesses","category":"page"},{"location":"api/#AugmentedGaussianProcesses.AugmentedGaussianProcesses","page":"API","title":"AugmentedGaussianProcesses.AugmentedGaussianProcesses","text":"General Framework for the data augmented Gaussian Processes\n\n\n\n\n\n","category":"module"},{"location":"api/#Model-Types","page":"API","title":"Model Types","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"GP\nVGP\nMCGP\nSVGP\nOnlineSVGP\nMOVGP\nMOSVGP\nVStP","category":"page"},{"location":"api/#AugmentedGaussianProcesses.GP","page":"API","title":"AugmentedGaussianProcesses.GP","text":"GP(args...; kwargs...)\n\nGaussian Process\n\nArguments\n\nX : input features, should be a matrix N×D where N is the number of observation and D the number of dimension\ny : input labels, can be either a vector of labels for multiclass and single output or a matrix for multi-outputs (note that only one likelihood can be applied)\nkernel : covariance function, can be either a single kernel or a collection of kernels for multiclass and multi-outputs models\n\nKeyword arguments\n\nnoise : Variance of the likelihood\nopt_noise : Flag for optimizing the variance by using the formul σ=Σ(y-f)^2/N\nmean : Option for putting a prior mean\nverbose : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\nIndependentPriors : Flag for setting independent or shared parameters among latent GPs\natfrequency : Choose how many variational parameters iterations are between hyperparameters optimization\nmean : PriorMean object, check the documentation on it MeanPrior\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.VGP","page":"API","title":"AugmentedGaussianProcesses.VGP","text":"VGP(args...; kwargs...)\n\nVariational Gaussian Process\n\nArguments\n\nX::AbstractArray : Input features, if X is a matrix the choice of colwise/rowwise is given by the obsdim keyword\ny::AbstractVector : Output labels\nkernel::Kernel : Covariance function, can be any kernel from KernelFunctions.jl\nlikelihood : Likelihood of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, see the Compatibility Table)\n\nKeyword arguments\n\nverbose : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.MCGP","page":"API","title":"AugmentedGaussianProcesses.MCGP","text":"MCGP(args...; kwargs...)\n\nMonte-Carlo Gaussian Process\n\nArguments\n\nX::AbstractArray : Input features, if X is a matrix the choice of colwise/rowwise is given by the obsdim keyword\ny::AbstractVector : Output labels\nkernel::Kernel : Covariance function, can be any kernel from KernelFunctions.jl\nlikelihood : Likelihood of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, at the moment only GibbsSampling is available (see the Compatibility Table)\n\nKeyword arguments\n\nverbose::Int : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD \n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.SVGP","page":"API","title":"AugmentedGaussianProcesses.SVGP","text":"SVGP(args...; kwargs...)\n\nSparse Variational Gaussian Process\n\nArguments\n\nX::AbstractArray : Input features, if X is a matrix the choice of colwise/rowwise is given by the obsdim keyword\ny::AbstractVector : Output labels\nkernel::Kernel : Covariance function, can be any kernel from KernelFunctions.jl\nlikelihood : Likelihood of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, see the Compatibility Table)\nnInducingPoints/Z : number of inducing points, or AbstractVector object\n\nKeyword arguments\n\nverbose : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nZoptimiser : Optimiser for inducing points locations\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.OnlineSVGP","page":"API","title":"AugmentedGaussianProcesses.OnlineSVGP","text":"OnlineSVGP(args...; kwargs...)\n\nOnline Sparse Variational Gaussian Process\n\nArguments\n\nkernel::Kernel : Covariance function, can be any kernel from KernelFunctions.jl\nlikelihood : Likelihood of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, see the Compatibility Table)\nZalg : Algorithm selecting how inducing points are selected\n\nKeywords arguments\n\nverbose : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nZoptimiser : Optimiser for inducing points locations\nT::DataType=Float64 : Hint for what the type of the data is going to be.\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.MOVGP","page":"API","title":"AugmentedGaussianProcesses.MOVGP","text":"MOVGP(args...; kwargs...)\n\nMulti-Output Variational Gaussian Process\n\nArguments\n\nX::AbstractVector : : Input features, if X is a matrix the choice of colwise/rowwise is given by the obsdim keyword\ny::AbstractVector{<:AbstractVector} : Output labels, each vector corresponds to one output dimension\nkernel::Union{Kernel,AbstractVector{<:Kernel} : covariance function or vector of covariance functions, can be either a single kernel or a collection of kernels for multiclass and multi-outputs models\nlikelihood::Union{AbstractLikelihood,Vector{<:Likelihood} : Likelihood or vector of likelihoods of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, for compatibilities see the Compatibility Table)\nnum_latent::Int : Number of latent GPs\n\nKeyword arguments\n\nverbose::Int : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Optimisers.jl library. Default is ADAM(0.001)\nAoptimiser : Optimiser used for the mixing parameters.\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.MOSVGP","page":"API","title":"AugmentedGaussianProcesses.MOSVGP","text":"MOSVGP(args...; kwargs...)\n\nMulti-Output Sparse Variational Gaussian Process\n\nArguments\n\nkernel::Union{Kernel,AbstractVector{<:Kernel} : covariance function or vector of covariance functions, can be either a single kernel or a collection of kernels for multiclass and multi-outputs models\nlikelihoods::Union{AbstractLikelihood,Vector{<:Likelihood} : Likelihood or vector of likelihoods of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, for compatibilities see the Compatibility Table)\nnLatent::Int : Number of latent GPs\nnInducingPoints : number of inducing points, or collection of inducing points locations\n\nKeyword arguments\n\nverbose::Int : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Optimisers.jl library. Default is ADAM(0.001)\nZoptimiser : Optimiser used for the inducing points locations\nAoptimiser : Optimiser used for the mixing parameters.\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.VStP","page":"API","title":"AugmentedGaussianProcesses.VStP","text":"VStP(args...; kwargs...)\n\nVariational Student-T Process\n\nArguments\n\nX::AbstractArray : Input features, if X is a matrix the choice of colwise/rowwise is given by the obsdim keyword\ny::AbstractVector : Output labels\nkernel::Kernel : Covariance function, can be any kernel from KernelFunctions.jl\nlikelihood : Likelihood of the model. For compatibilities, see Likelihood Types\ninference : Inference for the model, see the Compatibility Table)\nν::Real : Number of degrees of freedom \n\nKeyword arguments\n\nverbose : How much does the model print (0:nothing, 1:very basic, 2:medium, 3:everything)\noptimiser : Optimiser used for the kernel parameters. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is ADAM(0.001)\natfrequency::Int=1 : Choose how many variational parameters iterations are between hyperparameters optimization\nmean=ZeroMean() : PriorMean object, check the documentation on it MeanPrior\nobsdim::Int=1 : Dimension of the data. 1 : X ∈ DxN, 2: X ∈ NxD\n\n\n\n\n\n","category":"type"},{"location":"api/#Likelihood-Types","page":"API","title":"Likelihood Types","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"GaussianLikelihood\nStudentTLikelihood\nLaplaceLikelihood\nLogisticLikelihood\nHeteroscedasticLikelihood\nBayesianSVM\nSoftMaxLikelihood\nLogisticSoftMaxLikelihood\nPoissonLikelihood\nNegBinomialLikelihood","category":"page"},{"location":"api/#AugmentedGaussianProcesses.GaussianLikelihood","page":"API","title":"AugmentedGaussianProcesses.GaussianLikelihood","text":"GaussianLikelihood(σ²::T=1e-3) # σ² is the variance of the noise\n\nGaussian noise :\n\n    p(yf) = N(yfsigma^2)\n\nThere is no augmentation needed for this likelihood which is already conjugate to a Gaussian prior.\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.StudentTLikelihood","page":"API","title":"AugmentedGaussianProcesses.StudentTLikelihood","text":"StudentTLikelihood(ν::T, σ::Real=one(T))\n\nArguments\n\nν::Real : degrees of freedom of the student-T\nσ::Real : standard deviation of the local scale \n\nStudent-t likelihood for regression:\n\n    p(yfνσ) = fracΓ(fracν+12)sqrt(νπ) σ Γ(fracν2) (1+frac(y-f)^2σ^2ν)^(-fracν+12)\n\nwhere ν is the number of degrees of freedom and σ is the standard deviation for local scale of the data.\n\n\n\nFor the augmented analytical solution, it is augmented via:\n\n    p(yfomega) = N(yfsigma^2 omega)\n\nWhere omega sim mathcalIG(fracnu2fracnu2) where mathcalIG is the inverse-gamma distribution. See paper Robust Gaussian Process Regression with a Student-t Likelihood\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.LaplaceLikelihood","page":"API","title":"AugmentedGaussianProcesses.LaplaceLikelihood","text":"LaplaceLikelihood(β::T=1.0)  #  Laplace likelihood with scale β\n\nLaplace likelihood for regression:\n\nfrac12beta exp(-fracy-fβ)\n\nsee wiki page\n\nFor the analytical solution, it is augmented via:\n\np(yfω) = N(yfω¹)\n\nwhere ω sim textExp(ω  1(2 β^2)), and textExp is the Exponential distribution We use the variational distribution q(ω) = GIG(ωabp)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.LogisticLikelihood","page":"API","title":"AugmentedGaussianProcesses.LogisticLikelihood","text":"LogisticLikelihood() -> BernoulliLikelihood\n\nBernoulli likelihood with a logistic link for the Bernoulli likelihood\n\n    p(yf) = sigma(yf) = frac11 + exp(-yf)\n\n(for more info see : wiki page)\n\n\n\nFor the analytic version the likelihood, it is augmented via:\n\n    p(yfω) = expleft(frac12(yf - (yf)^2 omega)right)\n\nwhere ω sim mathcalPG(omega  1 0), and mathcalPG is the Polya-Gamma distribution. See paper : Efficient Gaussian Process Classification Using Polya-Gamma Data Augmentation.\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.HeteroscedasticLikelihood","page":"API","title":"AugmentedGaussianProcesses.HeteroscedasticLikelihood","text":"HeteroscedasticLikelihood(λ::T=1.0)->HeteroscedasticGaussianLikelihood\n\nArguments\n\nλ::Real : The maximum precision possible (this is optimized during training)\n\n\n\nGaussian with heteroscedastic noise given by another gp:\n\n    p(yfg) = mathcalN(yf(lambda sigma(g))^-1)\n\nWhere sigma is the logistic function\n\nThe augmentation is not trivial and will be described in a future paper\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.BayesianSVM","page":"API","title":"AugmentedGaussianProcesses.BayesianSVM","text":"BayesianSVM() -> BernoulliLikelihood\n\nThe Bayesian SVM is a Bayesian interpretation of the classical SVM.\n\np(yf) propto exp(2 max(1-yf 0))\n\n\n\nFor the analytic version of the likelihood, it is augmented via:\n\np(yf ω) = frac1sqrt(2piomega) expleft(-frac(1+omega-yf)^22omega)right)\n\nwhere ω sim 10infty) has an improper prior (his posterior is however has a valid distribution, a Generalized Inverse Gaussian). For reference see this paper.\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.SoftMaxLikelihood","page":"API","title":"AugmentedGaussianProcesses.SoftMaxLikelihood","text":"SoftMaxLikelihood(num_class::Int) -> MultiClassLikelihood\n\nArguments\n\nnum_class::Int : Total number of classes\nSoftMaxLikelihood(labels::AbstractVector) -> MultiClassLikelihood\n\nArguments\n\nlabels::AbstractVector : List of classes labels\n\nMulticlass likelihood with Softmax transformation:\n\np(y=if_k_k=1^K) = fracexp(f_i)sum_k=1^Kexp(f_k)\n\nThere is no possible augmentation for this likelihood\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.LogisticSoftMaxLikelihood","page":"API","title":"AugmentedGaussianProcesses.LogisticSoftMaxLikelihood","text":"LogisticSoftMaxLikelihood(num_class::Int) -> MultiClassLikelihood\n\nArguments\n\nnum_class::Int : Total number of classes\nLogisticSoftMaxLikelihood(labels::AbstractVector) -> MultiClassLikelihood\n\nArguments\n\nlabels::AbstractVector : List of classes labels\n\n\n\nThe multiclass likelihood with a logistic-softmax mapping: :\n\np(y=if_k_1^K) = fracsigma(f_i)sum_k=1^k sigma(f_k)\n\nwhere sigma is the logistic function. This likelihood has the same properties as softmax. –-\n\nFor the analytical version, the likelihood is augmented multiple times. More details can be found in the paper Multi-Class Gaussian Process Classification Made Conjugate: Efficient Inference via Data Augmentation.\n\n\n\n\n\n","category":"function"},{"location":"api/#GPLikelihoods.PoissonLikelihood","page":"API","title":"GPLikelihoods.PoissonLikelihood","text":"PoissonLikelihood(λ::Real)->PoissonLikelihood\n\nArguments\n\nλ::Real : Maximal Poisson rate\n\n\n\nPoisson Likelihood where a Poisson distribution is defined at every point in space (careful, it's different from continous Poisson processes).\n\n    p(yf) = textPoisson(ylambda sigma(f))\n\nWhere sigma is the logistic function. Augmentation details will be released at some point (open an issue if you want to see them)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.NegBinomialLikelihood","page":"API","title":"AugmentedGaussianProcesses.NegBinomialLikelihood","text":"NegBinomialLikelihood(r::Real)\n\nArguments\n\nr::Real number of failures until the experiment is stopped\n\n\n\nNegative Binomial likelihood with number of failures r\n\n    p(yr f) = y + r - 1 choose y (1 - sigma(f))^r sigma(f)^y\n\nif rin mathbbN or\n\n    p(yr f) = fracGamma(y + r)Gamma(y + 1)Gamma(r) (1 - sigma(f))^r sigma(f)^y\n\nif rinmathbbR. Where sigma is the logistic function\n\n\n\n\n\n","category":"type"},{"location":"api/#Inference-Types","page":"API","title":"Inference Types","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"AnalyticVI\nAnalyticSVI\nGibbsSampling\nQuadratureVI\nQuadratureSVI\nMCIntegrationVI\nMCIntegrationSVI","category":"page"},{"location":"api/#AugmentedGaussianProcesses.AnalyticVI","page":"API","title":"AugmentedGaussianProcesses.AnalyticVI","text":"AnalyticVI(;ϵ::T=1e-5)\n\nVariational Inference solver for conjugate or conditionally conjugate likelihoods (non-gaussian are made conjugate via augmentation) All data is used at each iteration (use AnalyticSVI for updates using minibatches)\n\nKeywords arguments\n\nϵ::Real : convergence criteria\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.AnalyticSVI","page":"API","title":"AugmentedGaussianProcesses.AnalyticSVI","text":"AnalyticSVI(nMinibatch::Int; ϵ::T=1e-5, optimiser=RobbinsMonro())\n\nStochastic Variational Inference solver for conjugate or conditionally conjugate likelihoods (non-gaussian are made conjugate via augmentation). See AnalyticVI for reference\n\nArguments\n\nnMinibatch::Integer : Number of samples per mini-batches\n\nKeywords arguments\n\nϵ::T : convergence criteria\noptimiser : Optimiser used for the variational updates. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is RobbinsMonro() (ρ=(τ+iter)^-κ)\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.GibbsSampling","page":"API","title":"AugmentedGaussianProcesses.GibbsSampling","text":"GibbsSampling(;ϵ::T=1e-5, nBurnin::Int=100, thinning::Int=1)\n\nDraw samples from the true posterior via Gibbs Sampling.\n\nKeywords arguments\n\nϵ::T : convergence criteria\nnBurnin::Int : Number of samples discarded before starting to save samples\nthinning::Int : Frequency at which samples are saved \n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.QuadratureVI","page":"API","title":"AugmentedGaussianProcesses.QuadratureVI","text":"QuadratureVI(;ϵ::T=1e-5, nGaussHermite::Integer=20, clipping=Inf, natural::Bool=true, optimiser=Momentum(0.0001))\n\nVariational Inference solver by approximating gradients via numerical integration via Quadrature\n\nKeyword arguments\n\nϵ::T : convergence criteria\nnGaussHermite::Int : Number of points for the integral estimation\nclipping::Real : Limit the gradients values to avoid overshooting\nnatural::Bool : Use natural gradients\noptimiser : Optimiser used for the variational updates. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is Momentum(0.0001)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.QuadratureSVI","page":"API","title":"AugmentedGaussianProcesses.QuadratureSVI","text":"QuadratureSVI(nMinibatch::Int; ϵ::T=1e-5, nGaussHermite::Int=20, clipping=Inf, natural=true, optimiser=Momentum(0.0001))\n\nStochastic Variational Inference solver by approximating gradients via numerical integration via Gaussian Quadrature. See QuadratureVI for a more detailed reference.\n\nArguments\n\n-nMinibatch::Integer : Number of samples per mini-batches\n\nKeyword arguments\n\nϵ::T : convergence criteria, which can be user defined\nnGaussHermite::Int : Number of points for the integral estimation (for the QuadratureVI)\nnatural::Bool : Use natural gradients\noptimiser : Optimiser used for the variational updates. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is Momentum(0.0001)\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.MCIntegrationVI","page":"API","title":"AugmentedGaussianProcesses.MCIntegrationVI","text":"MCIntegrationVI(;ϵ::T=1e-5, nMC::Integer=1000, clipping::Real=Inf, natural::Bool=true, optimiser=Momentum(0.001))\n\nVariational Inference solver by approximating gradients via MC Integration. It means the expectation E[log p(y|f)] as well as its gradients is computed by sampling from q(f).\n\nKeyword arguments\n\nϵ::Real : convergence criteria, which can be user defined\nnMC::Int : Number of samples per data point for the integral evaluation\nclipping::Real : Limit the gradients values to avoid overshooting\nnatural::Bool : Use natural gradients\noptimiser : Optimiser used for the variational updates. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is Momentum(0.01)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.MCIntegrationSVI","page":"API","title":"AugmentedGaussianProcesses.MCIntegrationSVI","text":"MCIntegrationSVI(batchsize::Int; ϵ::Real=1e-5, nMC::Integer=1000, clipping=Inf, natural=true, optimiser=Momentum(0.0001))\n\nStochastic Variational Inference solver by approximating gradients via Monte Carlo integration when using minibatches See MCIntegrationVI for more explanations.\n\nArgument\n\n-batchsize::Integer : Number of samples per mini-batches\n\nKeyword arguments\n\nϵ::T : convergence criteria, which can be user defined\nnMC::Int : Number of samples per data point for the integral evaluation\nclipping::Real : Limit the gradients values to avoid overshooting\nnatural::Bool : Use natural gradients\noptimiser : Optimiser used for the variational updates. Should be an Optimiser object from the Flux.jl library, see list here Optimisers and on this list. Default is Momentum() (ρ=(τ+iter)^-κ)\n\n\n\n\n\n","category":"function"},{"location":"api/#Functions-and-methods","page":"API","title":"Functions and methods","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"train!\nsample\npredict_f\npredict_y\nproba_y","category":"page"},{"location":"api/#AugmentedGaussianProcesses.train!","page":"API","title":"AugmentedGaussianProcesses.train!","text":"train!(model::AbstractGPModel; iterations::Integer=100, callback, convergence)\n\nFunction to train the given GP model.\n\nArguments\n\nmodel : AbstractGPModel model with either an Analytic, AnalyticVI or NumericalVI type of inference\n\nKeyword Arguments\n\niterations::Int : Number of iterations (not necessarily epochs!)for training\ncallback::Function=nothing : Callback function called at every iteration. Should be of type function(model,iter) ...  end\nconvergence::Function=nothing : Convergence function to be called every iteration, should return a scalar and take the same arguments as callback\n\n\n\n\n\ntrain!(model::AbstractGPModel, X::AbstractMatrix, y::AbstractArray; obsdim = 1, iterations::Int=10,callback=nothing,conv=0)\ntrain!(model::AbstractGPModel, X::AbstractVector, y::AbstractArray; iterations::Int=20,callback=nothing,conv=0)\n\nFunction to train the given GP model.\n\nKeyword Arguments\n\nthere are options to change the number of max iterations,\n\niterations::Int : Number of iterations (not necessarily epochs!)for training\ncallback::Function : Callback function called at every iteration. Should be of type function(model,iter) ...  end\nconv::Function : Convergence function to be called every iteration, should return a scalar and take the same arguments as callback\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.predict_f","page":"API","title":"AugmentedGaussianProcesses.predict_f","text":"predict_f(m::AbstractGPModel, X_test, cov::Bool=true, diag::Bool=true)\n\nCompute the mean of the predicted latent distribution of f on X_test for the variational GP model\n\nReturn also the diagonal variance if cov=true and the full covariance if diag=false\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.predict_y","page":"API","title":"AugmentedGaussianProcesses.predict_y","text":"predict_y(model::AbstractGPModel, X_test::AbstractVector)\npredict_y(model::AbstractGPModel, X_test::AbstractMatrix; obsdim = 1)\n\nReturn     - the predictive mean of X_test for regression     - 0 or 1 of X_test for classification     - the most likely class for multi-class classification     - the expected number of events for an event likelihood\n\n\n\n\n\n","category":"function"},{"location":"api/#AugmentedGaussianProcesses.proba_y","page":"API","title":"AugmentedGaussianProcesses.proba_y","text":"proba_y(model::AbstractGPModel, X_test::AbstractVector)\nproba_y(model::AbstractGPModel, X_test::AbstractMatrix; obsdim = 1)\n\nReturn the probability distribution p(ytest|model,Xtest) :\n\n- `Tuple{Vector,Vector}` of mean and variance for regression\n- `Vector{<:Real}` of probabilities of y_test = 1 for binary classification\n- `NTuple{K,<:AbstractVector}`, with element being a vector of probability for one class for multi-class classification\n\n\n\n\n\n","category":"function"},{"location":"api/#Prior-Means","page":"API","title":"Prior Means","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"ZeroMean\nConstantMean\nEmpiricalMean\nAffineMean","category":"page"},{"location":"api/#AugmentedGaussianProcesses.ZeroMean","page":"API","title":"AugmentedGaussianProcesses.ZeroMean","text":"ZeroMean()\n\nConstruct a mean prior set to 0 and which cannot be updated.\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.ConstantMean","page":"API","title":"AugmentedGaussianProcesses.ConstantMean","text":"ConstantMean(c::Real = 1.0; opt=ADAM(0.01))\n\nArguments\n\nc::Real : Constant value\n\nConstruct a prior mean with constant c Optionally set an optimiser opt (ADAM(0.01) by default)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.EmpiricalMean","page":"API","title":"AugmentedGaussianProcesses.EmpiricalMean","text":"EmpiricalMean(c::AbstractVector{<:Real}=1.0;opt=ADAM(0.01))\n\nArguments\n\nc::AbstractVector : Empirical mean vector\n\nConstruct a empirical mean with values c Optionally give an optimiser opt (ADAM(0.01) by default)\n\n\n\n\n\n","category":"type"},{"location":"api/#AugmentedGaussianProcesses.AffineMean","page":"API","title":"AugmentedGaussianProcesses.AffineMean","text":"AffineMean(w::Vector, b::Real; opt = ADAM(0.01))\nAffineMean(dims::Int; opt=ADAM(0.01))\n\nArguments\n\nw::Vector : Weight vector\nb::Real : Bias\ndims::Int : Number of features per vector\n\nConstruct an affine operation on X : μ₀(X) = X * w + b where w is a vector and b a scalar Optionally give an optimiser opt (Adam(α=0.01) by default)\n\n\n\n\n\n","category":"type"},{"location":"api/#Index","page":"API","title":"Index","text":"","category":"section"},{"location":"api/","page":"API","title":"API","text":"Pages = [\"api.md\"]\nModule = [\"AugmentedGaussianProcesses\"]\nOrder = [:type, :function]","category":"page"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/gpclassification.jl\"","category":"page"},{"location":"examples/gpclassification/#Gaussian-Process-Classification","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"(Image: )","category":"page"},{"location":"examples/gpclassification/#Preliminary-steps","page":"Gaussian Process Classification","title":"Preliminary steps","text":"","category":"section"},{"location":"examples/gpclassification/#Loading-necessary-packages","page":"Gaussian Process Classification","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"using Plots\nusing HTTP, CSV\nusing DataFrames: DataFrame\nusing AugmentedGaussianProcesses\nusing MLDataUtils","category":"page"},{"location":"examples/gpclassification/#Loading-the-banana-dataset-from-OpenML","page":"Gaussian Process Classification","title":"Loading the banana dataset from OpenML","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"data = HTTP.get(\"https://www.openml.org/data/get_csv/1586217/phpwRjVjk\")\ndata = CSV.read(data.body, DataFrame)\ndata.Class[data.Class .== 2] .= -1\ndata = Matrix(data)\nX = data[:, 1:2]\nY = Int.(data[:, end]);\n(X_train, y_train), (X_test, y_test) = splitobs((X, Y), 0.5, ObsDim.First())","category":"page"},{"location":"examples/gpclassification/#We-create-a-function-to-visualize-the-data","page":"Gaussian Process Classification","title":"We create a function to visualize the data","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"function plot_data(X, Y; size=(300, 500))\n    return Plots.scatter(\n        eachcol(X)...; group=Y, alpha=0.2, markerstrokewidth=0.0, lab=\"\", size=size\n    )\nend\nplot_data(X, Y; size=(500, 500))","category":"page"},{"location":"examples/gpclassification/#Run-sparse-classification-with-increasing-number-of-inducing-points","page":"Gaussian Process Classification","title":"Run sparse classification with increasing number of inducing points","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"Ms = [4, 8, 16, 32, 64]\nmodels = Vector{AbstractGPModel}(undef, length(Ms) + 1)\nkernel = SqExponentialKernel() ∘ ScaleTransform(1.0)\nfor (i, num_inducing) in enumerate(Ms)\n    @info \"Training with $(num_inducing) points\"\n    m = SVGP(\n        kernel,\n        LogisticLikelihood(),\n        AnalyticVI(),\n        inducingpoints(KmeansAlg(num_inducing), X);\n        optimiser=false,\n        Zoptimiser=false,\n    )\n    @time train!(m, X_train, y_train, 20)\n    models[i] = m\nend","category":"page"},{"location":"examples/gpclassification/#Running-the-full-model","page":"Gaussian Process Classification","title":"Running the full model","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"@info \"Running full model\"\nmfull = VGP(X_train, y_train, kernel, LogisticLikelihood(), AnalyticVI(); optimiser=false)\n@time train!(mfull, 5)\nmodels[end] = mfull","category":"page"},{"location":"examples/gpclassification/#We-create-a-prediction-and-plot-function-on-a-grid","page":"Gaussian Process Classification","title":"We create a prediction and plot function on a grid","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"function compute_grid(model, n_grid=50)\n    mins = [-3.25, -2.85]\n    maxs = [3.65, 3.4]\n    x_lin = range(mins[1], maxs[1]; length=n_grid)\n    y_lin = range(mins[2], maxs[2]; length=n_grid)\n    x_grid = Iterators.product(x_lin, y_lin)\n    y_grid, _ = proba_y(model, vec(collect.(x_grid)))\n    return y_grid, x_lin, y_lin\nend\n\nfunction plot_model(model, X, Y, title=nothing; size=(300, 500))\n    n_grid = 50\n    y_pred, x_lin, y_lin = compute_grid(model, n_grid)\n    title = if isnothing(title)\n        (model isa SVGP ? \"M = $(AGP.dim(model[1]))\" : \"full\")\n    else\n        title\n    end\n    p = plot_data(X, Y; size=size)\n    Plots.contour!(\n        p,\n        x_lin,\n        y_lin,\n        reshape(y_pred, n_grid, n_grid)';\n        cbar=false,\n        levels=[0.5],\n        fill=false,\n        color=:black,\n        linewidth=2.0,\n        title=title,\n    )\n    if model isa SVGP\n        Plots.scatter!(\n            p, eachrow(hcat(AGP.Zview(model[1])...))...; msize=2.0, color=\"black\", lab=\"\"\n        )\n    end\n    return p\nend;\nnothing #hide","category":"page"},{"location":"examples/gpclassification/#Now-run-the-prediction-for-every-model-and-visualize-the-differences","page":"Gaussian Process Classification","title":"Now run the prediction for every model and visualize the differences","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"Plots.plot(\n    plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)\n)","category":"page"},{"location":"examples/gpclassification/#Bayesian-SVM-vs-Logistic","page":"Gaussian Process Classification","title":"Bayesian SVM vs Logistic","text":"","category":"section"},{"location":"examples/gpclassification/#We-now-create-a-model-with-the-Bayesian-SVM-likelihood","page":"Gaussian Process Classification","title":"We now create a model with the Bayesian SVM likelihood","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"mbsvm = VGP(X_train, y_train, kernel, BayesianSVM(), AnalyticVI(); optimiser=false)\n@time train!(mbsvm, 5)","category":"page"},{"location":"examples/gpclassification/#And-compare-it-with-the-Logistic-likelihood","page":"Gaussian Process Classification","title":"And compare it with the Logistic likelihood","text":"","category":"section"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"Plots.plot(\n    plot_model.(\n        [models[end], mbsvm], Ref(X), Ref(Y), [\"Logistic\", \"BSVM\"]; size=(500, 250)\n    )...;\n    layout=(1, 2),\n)","category":"page"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"","category":"page"},{"location":"examples/gpclassification/","page":"Gaussian Process Classification","title":"Gaussian Process Classification","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/gpregression.jl\"","category":"page"},{"location":"examples/gpregression/#Gaussian-Process-Regression-(for-large-data)","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"","category":"section"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"(Image: )","category":"page"},{"location":"examples/gpregression/#Loading-necessary-packages","page":"Gaussian Process Regression (for large data)","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"using AugmentedGaussianProcesses\nusing Distributions\nusing Plots","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"We create a toy dataset with X ∈ [-20, 20] and y = 5 * sinc(X)","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"N = 1000\nX = reshape((sort(rand(N)) .- 0.5) * 40.0, N, 1)\nσ = 0.01\n\nfunction latent(x)\n    return 5.0 * sinc.(x)\nend\nY = vec(latent(X) + σ * randn(N));\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Visualization of the data :","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"scatter(X, Y; lab=\"\")","category":"page"},{"location":"examples/gpregression/#Gaussian-noise","page":"Gaussian Process Regression (for large data)","title":"Gaussian noise","text":"","category":"section"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"In this first example we are going to look at the effect of using inducing points compared to the true Gaussian Process For simplicity we will keep all inducing points and kernel parameters fixed","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Ms = [4, 8, 16, 32, 64];\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Create an empty array of GPs","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"models = Vector{AbstractGPModel}(undef, length(Ms) + 1);\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Chose a kernel","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"kernel = SqExponentialKernel();#  + PeriodicKernel()\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"And Run sparse classification with an increasing number of inducing points","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"for (index, num_inducing) in enumerate(Ms)\n    @info \"Training with $(num_inducing) points\"\n    m = SVGP(\n        kernel, # Kernel\n        GaussianLikelihood(σ), # Likelihood used\n        AnalyticVI(), # Inference usede to solve the problem\n        inducingpoints(KmeansAlg(num_inducing), X); # Inducing points initialized with kmeans\n        optimiser=false, # Keep kernel parameters fixed\n        Zoptimiser=false, # Keep inducing points locations fixed\n    )\n    @time train!(m, X, Y, 100) # Train the model for 100 iterations\n    models[index] = m # Save the model in the array\nend","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Train the model without any inducing points (no approximation)","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"@info \"Training with full model\"\nmfull = GP(\n    X,\n    Y,\n    kernel;\n    noise=σ,\n    opt_noise=false, # Keep the noise value fixed\n    optimiser=false, # Keep kernel parameters fixed\n)\n@time train!(mfull, 5);\nmodels[end] = mfull;\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Create a grid and compute prediction on it","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"function compute_grid(model, n_grid=50)\n    mins = -20\n    maxs = 20\n    x_grid = range(mins, maxs; length=n_grid) # Create a grid\n    y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid\n    return y_grid, sig_y_grid, x_grid\nend;\nnothing #hide","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Plot the data as a scatter plot","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"function plotdata(X, Y)\n    return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab=\"\", size=(300, 500))\nend\n\nfunction plot_model(model, X, Y, title=nothing)\n    n_grid = 100\n    y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)\n    title = if isnothing(title)\n        (model isa SVGP ? \"M = $(dim(model[1]))\" : \"full\")\n    else\n        title\n    end\n\n    p = plotdata(X, Y)\n    Plots.plot!(\n        p,\n        x_grid,\n        y_grid;\n        ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations\n        title=title,\n        color=\"red\",\n        lab=\"\",\n        linewidth=3.0,\n    )\n    if model isa SVGP # Plot the inducing points as well\n        Plots.plot!(\n            p,\n            vec(model.f[1].Z),\n            zeros(dim(model.f[1]));\n            msize=2.0,\n            color=\"black\",\n            t=:scatter,\n            lab=\"\",\n        )\n    end\n    return p\nend;\n\nPlots.plot(\n    plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)\n) # Plot all models and combine the plots","category":"page"},{"location":"examples/gpregression/#Non-Gaussian-Likelihoods","page":"Gaussian Process Regression (for large data)","title":"Non-Gaussian Likelihoods","text":"","category":"section"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"We now look at using another noise than Gaussian noise. In AGP.jl you can use the Student-T likelihood, the Laplace likelihood and the Heteroscedastic likelihood","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"We will use the same toy dataset for our experiment","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Create an array of model with different likelihoods:","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"likelihoods = [\n    StudentTLikelihood(3.0), LaplaceLikelihood(3.0), HeteroscedasticLikelihood(1.0)\n]\nngmodels = Vector{AbstractGPModel}(undef, length(likelihoods) + 1)\nfor (i, l) in enumerate(likelihoods)\n    @info \"Training with the $(l)\" # We need to use VGP\n    m = VGP(\n        X,\n        Y, # First arguments are the input and output\n        kernel, # Kernel\n        l, # Likelihood used\n        AnalyticVI(); # Inference usede to solve the problem\n        optimiser=false, # Keep kernel parameters fixed\n    )\n    @time train!(m, 10) # Train the model for 100 iterations\n    ngmodels[i] = m # Save the model in the array\nend\n\nngmodels[end] = models[end] # Add the Gaussian model","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"We can now repeat the prediction from before :","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"Plots.plot(\n    plot_model.(\n        ngmodels, Ref(X), Ref(Y), [\"Student-T\", \"Laplace\", \"Heteroscedastic\", \"Gaussian\"]\n    )...;\n    layout=(2, 2),\n    size=(1000, 200),\n) # Plot all models and combine the plots","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"","category":"page"},{"location":"examples/gpregression/","page":"Gaussian Process Regression (for large data)","title":"Gaussian Process Regression (for large data)","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/onlinegp.jl\"","category":"page"},{"location":"examples/onlinegp/#Online-Gaussian-Process","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"(Image: )","category":"page"},{"location":"examples/onlinegp/#Loading-necessary-packages","page":"Online Gaussian Process","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/onlinegp/#Preliminary-steps","page":"Online Gaussian Process","title":"Preliminary steps","text":"","category":"section"},{"location":"examples/onlinegp/#Load-the-necessary-packages","page":"Online Gaussian Process","title":"Load the necessary packages","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"using Plots\nusing AugmentedGaussianProcesses\nusing MLDataUtils, Distributions","category":"page"},{"location":"examples/onlinegp/#We-create-a-toy-dataset-with-a-noisy-sinus","page":"Online Gaussian Process","title":"We create a toy dataset with a noisy sinus","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"N = 2000\nσ = 0.1\nX, y = noisy_sin(N, 0, 20; noise=σ)\nX_train = X[1:2:end];\ny_train = y[1:2:end]; # We split the data equally\nX_test = X[2:2:end];\ny_test = y[2:2:end];\nscatter(X_train, y_train)","category":"page"},{"location":"examples/onlinegp/#Plot-model-at-each-step","page":"Online Gaussian Process","title":"Plot model at each step","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"function plot_model(model, X, X_test, X_train, y_train)\n    y_pred, sig_pred = proba_y(model, X_test)\n    plot(X, sin; lab=\"f\", color=:black, lw=3.0, ylims=(-2, 2))\n    plot!(X_test, y_pred; ribbon=sqrt.(sig_pred), lab=\"Prediction\", lw=3.0)\n    scatter!(X_train, y_train; msw=0.0, alpha=0.5, lab=\"Data\")\n    return scatter!(first.(model[1].Z), mean(model[1]); lab=\"IP\")\nend","category":"page"},{"location":"examples/onlinegp/#Model-training","page":"Online Gaussian Process","title":"Model training","text":"","category":"section"},{"location":"examples/onlinegp/#Create-a-kernel","page":"Online Gaussian Process","title":"Create a kernel","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"k = SqExponentialKernel();\nnothing #hide","category":"page"},{"location":"examples/onlinegp/#Create-an-inducing-point-selection-method","page":"Online Gaussian Process","title":"Create an inducing point selection method","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"IP_alg = OIPS(0.8);\nnothing #hide","category":"page"},{"location":"examples/onlinegp/#Create-the-model-and-stream-the-data","page":"Online Gaussian Process","title":"Create the model and stream the data","text":"","category":"section"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"model = OnlineSVGP(k, GaussianLikelihood(σ), AnalyticVI(), IP_alg; optimiser=false)\nanim = Animation()\nsize_batch = 100\nlet state = nothing\n    for (i, (X_batch, y_batch)) in\n        enumerate(eachbatch((X_train, y_train); obsdim=1, size=size_batch))\n        _, state = train!(model, X_batch, y_batch, state; iterations=5)\n        plot_model(\n            model, X, X_test, X_train[1:(i * size_batch)], y_train[1:(i * size_batch)]\n        )\n        frame(anim)\n    end\nend\ngif(anim; fps=4)","category":"page"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"This works just as well with any likelihood! Just try it out!","category":"page"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"","category":"page"},{"location":"examples/onlinegp/","page":"Online Gaussian Process","title":"Online Gaussian Process","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/#Examples","page":"Examples","title":"Examples","text":"","category":"section"},{"location":"examples/","page":"Examples","title":"Examples","text":"The best way to understand how the package is working is to look at examples. For each model you can find a Jupyter notebook on this repository.","category":"page"},{"location":"userguide/#User-Guide","page":"User Guide","title":"User Guide","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"There are 3 main actions needed to train and use the different models:","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Initialization\nTraining\nPrediction","category":"page"},{"location":"userguide/#init","page":"User Guide","title":"Initialization","text":"","category":"section"},{"location":"userguide/#Possible-models","page":"User Guide","title":"Possible models","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"There are currently 8 possible Gaussian Process models:","category":"page"},{"location":"userguide/#[GP](@ref)","page":"User Guide","title":"GP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"GP corresponds to the original GP regression model, it is necessarily with a Gaussian likelihood.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    GP(X_train, y_train, kernel; kwargs...)","category":"page"},{"location":"userguide/#[VGP](@ref)","page":"User Guide","title":"VGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"VGP is a variational GP model: a multivariate Gaussian is approximating the true posterior. There is no inducing points augmentation involved. Therefore it is well suited for small datasets (~10^3 samples).","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    VGP(X_train, y_train, kernel, likelihood, inference; kwargs...)","category":"page"},{"location":"userguide/#[SVGP](@ref)","page":"User Guide","title":"SVGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"SVGP is a variational GP model augmented with inducing points. The optimization is done on those points, allowing for stochastic updates and large scalability. The counterpart can be a slightly lower accuracy and the need to select the number and the location of the inducing points (however this is a problem currently worked on).","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    SVGP(kernel, likelihood, inference, Z; kwargs...)","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Where Z is the position of the inducing points.","category":"page"},{"location":"userguide/#[MCGP](@ref)","page":"User Guide","title":"MCGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"MCGP is a GP model where the posterior is represented via a collection of samples.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"   MCGP(X_train, y_train, kernel, likelihood, inference; kwargs...)","category":"page"},{"location":"userguide/#[OnlineSVGP](@ref)","page":"User Guide","title":"OnlineSVGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"OnlineSVGP is an online variational GP model. It is based on the streaming method of Bui 17', it supports all likelihoods, even with multiple latent GPs.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    OnlineSVGP(kernel, likelihood, inference, ind_point_algorithm; kwargs...)","category":"page"},{"location":"userguide/#[MOVGP](@ref)","page":"User Guide","title":"MOVGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"MOVGP is a multi output variational GP model based on the principle f_output[i] = sum(A[i, j] * f_latent[j] for j in 1:n_latent). The number of latent GP is free.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    MOVGP(X_train, ys_train, kernel, likelihood/s, inference, n_latent; kwargs...)","category":"page"},{"location":"userguide/#[MOSVGP](@ref)","page":"User Guide","title":"MOSVGP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"MOSVGP is the same thing as MOVGP but with inducing points: a multi output sparse variational GP model, based on Moreno-Muñoz 18'.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    MOVGP(kernel, likelihood/s, inference, n_latent, n_inducing_points; kwargs...)","category":"page"},{"location":"userguide/#[VStP](@ref)","page":"User Guide","title":"VStP","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"VStP is a variational Student-T model where the prior is a multivariate Student-T distribution with scale K, mean μ₀ and degrees of freedom ν. The inference is done automatically by augmenting the prior as a scale mixture of inverse gamma.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    VStP(X_train, y_train, kernel, likelihood, inference, ν; kwargs...)","category":"page"},{"location":"userguide/#likelihood_user","page":"User Guide","title":"Likelihood","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"GP can only have a Gaussian likelihood, while the other have more choices. Here are the ones currently implemented:","category":"page"},{"location":"userguide/#Regression","page":"User Guide","title":"Regression","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"For regression, four likelihoods are available :","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The classical GaussianLikelihood, for Gaussian noise.\nThe StudentTLikelihood, assuming noise from a Student-T distribution (more robust to ouliers).\nThe LaplaceLikelihood, with noise from a Laplace distribution.\nThe HeteroscedasticLikelihood, (in development) where the noise is a function of the input: Var(X) = λσ^-1(g(X)) where g(X) is an additional Gaussian Process and σ is the logistic function.","category":"page"},{"location":"userguide/#Classification","page":"User Guide","title":"Classification","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"For classification one can select among","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The LogisticLikelihood : a Bernoulli likelihood with a logistic link.\nThe BayesianSVM likelihood based on the frequentist SVM, equivalent to use a hinge loss.","category":"page"},{"location":"userguide/#Event-Likelihoods","page":"User Guide","title":"Event Likelihoods","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"For likelihoods such as Poisson or Negative Binomial, we approximate a parameter by σ(f). Two Likelihoods are implemented :","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The PoissonLikelihood : A discrete Poisson process (one parameter per point) with the scale parameter defined as λσ(f).\nThe NegBinomialLikelihood : The Negative Binomial likelihood where r is fixed and we define the success probability p as σ(f).","category":"page"},{"location":"userguide/#Multi-class-classification","page":"User Guide","title":"Multi-class classification","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"There is two available likelihoods for multi-class classification:","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The SoftMaxLikelihood, the most common approach. However no analytical solving is possible.\nThe LogisticSoftMaxLikelihood, a modified softmax where the exponential function is replaced by the logistic function. It allows to get a fully conjugate model, Corresponding paper.","category":"page"},{"location":"userguide/#More-options","page":"User Guide","title":"More options","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"There is the project to get distributions from Distributions.jl to work directly as likelihoods.","category":"page"},{"location":"userguide/#Inference","page":"User Guide","title":"Inference","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Inference can be done in various ways.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"AnalyticVI : Variational Inference with closed-form updates. For non-Gaussian likelihoods, this relies on augmented version of the likelihoods. For using Stochastic Variational Inference, one can use AnalyticSVI with the size of the mini-batch as an argument.\nGibbsSampling : Gibbs Sampling of the true posterior, this also rely on an augmented version of the likelihoods, this is only valid for the VGP model at the moment.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The two next methods rely on numerical approximation of an integral and I therefore recommend using the classical Descent approach as it will use anyway the natural gradient updates. ADAM seem to give random results.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"QuadratureVI : Variational Inference with gradients computed by estimating the expected log-likelihood via quadrature.\nMCIntegrationVI : Variational Inference with gradients computed by estimating the expected log-likelihood via Monte Carlo Integration.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"[WIP] : AdvancedHMC.jl will be integrated at some point, although generally the Gibbs sampling is preferable when available.","category":"page"},{"location":"userguide/#compat_table","page":"User Guide","title":"Compatibility table","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Not all inference are implemented/valid for all likelihoods, here is the compatibility table between them.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Likelihood/Inference AnalyticVI GibbsSampling QuadratureVI MCIntegrationVI\nGaussianLikelihood ✔ (Analytic) ✖ ✖ ✖\nStudentTLikelihood ✔ ✔ ✔ ✖\nLaplaceLikelihood ✔ ✔ ✔ ✖\nHeteroscedasticLikelihood ✔ ✔ (dev) ✖\nLogisticLikelihood ✔ ✔ ✔ ✖\nBayesianSVM ✔ (dev) ✖ ✖\nLogisticSoftMaxLikelihood ✔ ✔ ✖ (dev)\nSoftMaxLikelihood ✖ ✖ ✖ ✔\nPoisson ✔ ✔ ✖ ✖\nNegBinomialLikelihood ✔ ✔ ✖ ✖\n    ","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"(dev) means that the feature is possible and may be developped and tested but is not available yet. All contributions or requests are very welcome!","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Model/Inference AnalyticVI GibbsSampling QuadratureVI MCIntegrationVI\nVGP ✔ ✖ ✔ ✔\nSVGP ✔ ✖ ✔ ✔\nMCGP ✖ ✔ ✖ ✖\nOnlineSVGP ✔ ✖ ✖ ✖\nMO(S)VGP ✔ ✖ ✔ ✔\nVStP ✔ ✖ ✔ ✔","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Note that for MO(S)VGP you can use a mix of different likelihoods.","category":"page"},{"location":"userguide/#Inducing-Points","page":"User Guide","title":"Inducing Points","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Both SVGP and MOSVGP do not take data directly as inputs but inducing points instead. AGP.jl directly reexports the InducingPoints.jl package for you to use. For example to use a k-means approach to select 100 points on your input data you can use:","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    Z = inducingpoints(KmeanAlg(100), X)","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Z will always be an AbstractVector and be directly compatible with SVGP and MOSVGP","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"For OnlineSVGP, since it cannot be assumed that you have data from the start, only an online inducing points selection algorithm can be used. The inducing points locations will be initialized with the first batch of data","category":"page"},{"location":"userguide/#Additional-Parameters","page":"User Guide","title":"Additional Parameters","text":"","category":"section"},{"location":"userguide/#Hyperparameter-optimization","page":"User Guide","title":"Hyperparameter optimization","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"One can optimize the kernel hyperparameters as well as the inducing points location by maximizing the ELBO. All derivations are already hand-coded (no AD needed). One can select the optimization scheme via :","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The optimiser keyword, can be nothing or false for no optimization or can be an optimiser from the Flux.jl library, see list here Optimisers.\nThe Zoptimiser keyword, similar to optimiser it is used for optimizing the inducing points locations, it is by default set to nothing (no optimization).","category":"page"},{"location":"userguide/#meanprior","page":"User Guide","title":"PriorMean","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The mean keyword allows you to add different types of prior means:","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"ZeroMean, a constant mean that cannot be optimized.\nConstantMean, a constant mean that can be optimized.\nEmpiricalMean, a vector mean with a different value for each point.\nAffineMean, μ₀ is given by X*w + b.","category":"page"},{"location":"userguide/#train","page":"User Guide","title":"Training","text":"","category":"section"},{"location":"userguide/#Offline-models","page":"User Guide","title":"Offline models","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Training is straightforward after initializing the model by running :","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"model, state = train!(model, X_train, y_train; iterations=100, callback=callbackfunction)","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"where the callback option is for running a function at every iteration. callbackfunction should be defined as","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"function callbackfunction(model, iter)\n    # do things here...\nend","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"The returned state will contain different variables such as some kernel matrices and local variables. You can reuse this state to save some computations when using prediction functions or computing the ELBO.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Note that passing X_train and y_train is optional for GP, VGP and MCGP","category":"page"},{"location":"userguide/#Online-models","page":"User Guide","title":"Online models","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"We recommend looking at the tutorial on online Gaussian processes. One needs to pass a state around, i.e.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"    let state=nothing\n        for (X_batch, y_batch) in eachbatch((X_train, y_train))\n            model, state = train!(model, X_batch, y_batch, state; iterations=10)\n        end\n    end","category":"page"},{"location":"userguide/#pred","page":"User Guide","title":"Prediction","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Once the model has been trained it is finally possible to compute predictions. There always three possibilities :","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"predict_f(model, X_test; covf=true, fullcov=false) : Compute the parameters (mean and covariance) of the latent normal distributions of each test points. If covf=false return only the mean, if fullcov=true return a covariance matrix instead of only the diagonal.\npredict_y(model, X_test) : Compute the point estimate of the predictive likelihood for regression or the label of the most likely class for classification.\nproba_y(model, X_test) : Return the mean with the variance of each point for regression or the predictive likelihood to obtain the class y=1 for classification.","category":"page"},{"location":"userguide/#Miscellaneous","page":"User Guide","title":"Miscellaneous","text":"","category":"section"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"🚧 In construction – Should be developed in the near future 🚧","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Saving/Loading models","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"Once a model has been trained it is possible to save its state in a file by using  save_trained_model(filename,model), a partial version of the file will be save in filename.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"It is then possible to reload this file by using load_trained_model(filename). !!!However note that it will not be possible to train the model further!!! This function is only meant to do further predictions.","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"🚧 Pre-made callback functions 🚧","category":"page"},{"location":"userguide/","page":"User Guide","title":"User Guide","text":"There is one (for now) premade function to return a a MVHistory object and callback function for the training of binary classification problems. The callback will store the ELBO and the variational parameters at every iterations included in iterpoints If `Xtestandy_test` are provided it will also store the test accuracy and the mean and median test loglikelihood","category":"page"},{"location":"background/#The-bits-of-math-and-science-behind-it","page":"Background","title":"The bits of math and science behind it","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"You can find the behind the scene augmentation theory in this paper : Automated Augmented Conjugate Inference for Non-conjugate Gaussian Process Models.","category":"page"},{"location":"background/#Gaussian-Processes","page":"Background","title":"Gaussian Processes","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"To quote Wikipedia \"A Gaussian process is a stochastic process (a collection of random variables indexed by time or space), such that every finite collection of those random variables has a multivariate normal distribution, i.e. every finite linear combination of them is normally distributed. The distribution of a Gaussian process is the joint distribution of all those (infinitely many) random variables, and as such, it is a distribution over functions with a continuous domain, e.g. time or space.\"","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"For a detailed understanding of Gaussian processes, check the wonderful book of Rasmussen and Williams and for a quick introduction, check this tutorial by Zoubin Ghahramani.","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"Gaussian Processes are extremely practical models since they are non-parametric and are Bayesian. However the basic model is limited to regression with Gaussian noise and does not scale very well to large datasets (>1000 samples). The Augmented Gaussian Processes solve both these problems by adding inducing points as well as transforming the likelihood to get efficient variational inference.","category":"page"},{"location":"background/#Augmented-Gaussian-Processes","page":"Background","title":"Augmented Gaussian Processes","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"We are interested in models which consist of a GP prior on a latent function fsim textGP(0k), where k is the kernel function and the data y is connected to f via a non-conjugate likelihood p(yf) . We now aim on finding an augmented representation of the model which renders the model conditionally conjugate. Let omega be potential augmentation, then the augmented joint distribution is","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"p(yfomega) =p(yfomega)p(omega)p(f)","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"The original model can be restored by marginalizing omega, i.e. p(yf) =int p(yfomega)domega.","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"The  goal  is  to  find  an  augmentation omega,  such  that  the  augmented  likelihood p(yfomega) becomes conjugate to the prior distributions p(f) and p(omega) and the expectations of the log complete conditional distributions log p(fomegay) and log p(omegafy) can be computed in closed-form.","category":"page"},{"location":"background/#How-to-find-a-suitable-augmentation?","page":"Background","title":"How to find a suitable augmentation?","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"Many popular likelihood functions can be expressed as a scale mixture of Gaussians","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"p(yf) =int N(yBftextdiag(omega^1))p(omega)domega","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"where B is a matrix (Palmer et al., 2006).  This representation directly leads to the augmented likelihood p(yomegaf) =N(yBftextdiag(omega^1)) which is conjugate in f, i.e. the posterior is again a Gaussian. I am currently working on a generalized  and automatic approach, which should be available during this year.","category":"page"},{"location":"background/#Inference-in-the-augmented-model","page":"Background","title":"Inference in the augmented model","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"If we assume that the augmentation, discussed in the previous section, was successful and that we obtained an augmented model p(yfomega) = p(yfomega)p(f)p(omega) which is conditionally conjugate. In a conditionally conjugate model variational inference is easy and block coordinate ascent updates can be computed in closed-form. We follow as structured mean-field approach and assume a decoupling between the latent GP f and the auxiliary variable omega in the variational distribution q(fomega) = q(f) q(omega).  We alternate between updating q(f) and q(omega) by using the typical coordinate ascent (CAVI) updates building on expectations of the log complete conditionals.","category":"page"},{"location":"background/","page":"Background","title":"Background","text":"The hyperparameter of the latent GP (e.g. length scale) are learned by optimizing the variational lower bound as function of the hyper parameters. We alternate between updating the variational parameters and the hyperparameters.","category":"page"},{"location":"background/#Sparse-Gaussian-Processes","page":"Background","title":"Sparse Gaussian Processes","text":"","category":"section"},{"location":"background/","page":"Background","title":"Background","text":"Direct inference for GPs has a cubic computational complexity mathcalO(N^3). To scale our model to big datasets we approximate the latent GP by a sparse GP building on inducing points. This reduces the complexity to mathcalO(M^3), where M is the number of inducing points. Using inducing points allows us to employ stochastic variational inference (SVI) that computes the updates based on mini-batches of the data.","category":"page"},{"location":"","page":"Home","title":"Home","text":"(Image: AugmentedGaussianProcesses.jl)","category":"page"},{"location":"","page":"Home","title":"Home","text":"(Image: Docs Latest) (Image: Docs Stable) (Image: BuildStatus) (Image: Coverage Status)","category":"page"},{"location":"","page":"Home","title":"Home","text":"A Julia package for Augmented and Normal Gaussian Processes.","category":"page"},{"location":"","page":"Home","title":"Home","text":"","category":"page"},{"location":"#Author","page":"Home","title":"Author","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Théo Galy-Fajou PhD Student at Technical University of Berlin.","category":"page"},{"location":"#Installation","page":"Home","title":"Installation","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"AugmentedGaussianProcesses is a registered package and is symply installed by running","category":"page"},{"location":"","page":"Home","title":"Home","text":"pkg> add AugmentedGaussianProcesses","category":"page"},{"location":"#Basic-example","page":"Home","title":"Basic example","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Here is a simple example to start right away :","category":"page"},{"location":"","page":"Home","title":"Home","text":"using AugmentedGaussianProcesses\nmodel = SVGP(compose(SqExponentialKernel(), ScaleTransform(1.0)), LogisticLikelihood(), AnalyticVI(), inducingpoints(KmeansAlg(50), X_train))\ntrain!(model, X_train, y_train; iterations=100)\ny_pred = predict_y(model, X_test)","category":"page"},{"location":"#Related-Gaussian-Processes-packages","page":"Home","title":"Related Gaussian Processes packages","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"GaussianProcesses.jl : General package for Gaussian Processes with many available likelihoods.\nStheno.jl : Package for Gaussian Process regression.\nAbstractGPs.jl : General package containing base functions for working with GPs.\nGPLikelihoods.jl : Package to define likelihoods for latent GP models.\nApproximateGPs.jl : Package for variational GPs based on AbstractGPs.jl.","category":"page"},{"location":"","page":"Home","title":"Home","text":"A general comparison between this package is done on Julia GP Package Comparison.","category":"page"},{"location":"#License","page":"Home","title":"License","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"AugmentedGaussianProcesses.jl is licensed under the MIT \"Expat\" license; see LICENSE for the full license text.","category":"page"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"EditURL = \"https://github.com/theogf/AugmentedGaussianProcesses.jl/blob/master/docs/examples/multiclassgp.jl\"","category":"page"},{"location":"examples/multiclassgp/#Gaussian-Process-Multi-Class-Classification","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"(Image: )","category":"page"},{"location":"examples/multiclassgp/#Preliminary-steps","page":"Gaussian Process Multi-Class Classification","title":"Preliminary steps","text":"","category":"section"},{"location":"examples/multiclassgp/#Loading-necessary-packages","page":"Gaussian Process Multi-Class Classification","title":"Loading necessary packages","text":"","category":"section"},{"location":"examples/multiclassgp/#Data-generation-and-setting-up-packages","page":"Gaussian Process Multi-Class Classification","title":"Data generation and setting up packages","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"using Plots\nusing Distributions\nusing AugmentedGaussianProcesses","category":"page"},{"location":"examples/multiclassgp/#Generate-data-from-a-mixture-of-gaussians-(you-can-control-the-noise)","page":"Gaussian Process Multi-Class Classification","title":"Generate data from a mixture of gaussians (you can control the noise)","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"n_data = 300\nn_dim = 2\nn_grid = 100\nminx = -2.5;\nmaxx = 3.5;\nnothing #hide","category":"page"},{"location":"examples/multiclassgp/#We-try-different-noises-(different-overlaps)","page":"Gaussian Process Multi-Class Classification","title":"We try different noises (different overlaps)","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"σs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.8]\nn_class = n_dim + 1;\nnothing #hide","category":"page"},{"location":"examples/multiclassgp/#We-create-a-function-generating-a-mixture-of-Gaussians","page":"Gaussian Process Multi-Class Classification","title":"We create a function generating a mixture of Gaussians","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"function generate_mixture_data(σ)\n    centers = zeros(n_class, n_dim)\n    # Create equidistant centers\n    for i in 1:n_dim\n        centers[i, i] = 1.0\n    end\n    centers[end, :] .= (1 + sqrt(n_class)) / n_dim\n    centers ./= sqrt(n_dim)\n    # Generate distributions with desired noise\n    distr = [MvNormal(centers[i, :], σ) for i in 1:n_class]\n    X = zeros(Float64, n_data, n_dim)\n    y = zeros(Int64, n_data)\n    for i in eachindex(y)\n        y[i] = rand(1:n_class)\n        X[i, :] = rand(distr[y[i]])\n    end\n    return X, y\nend","category":"page"},{"location":"examples/multiclassgp/#And-a-function-to-plot-the-data","page":"Gaussian Process Multi-Class Classification","title":"And a function to plot the data","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"function plot_data(X, Y, σ)\n    p = Plots.plot(size(300, 500); lab=\"\", title=\"sigma = $σ\")\n    Plots.scatter!(eachcol(X)...; group=Y, msw=0.0, lab=\"\")\n    return p\nend\n\nplot([plot_data(generate_mixture_data(σ)..., σ) for σ in σs]...)","category":"page"},{"location":"examples/multiclassgp/#Model-training","page":"Gaussian Process Multi-Class Classification","title":"Model training","text":"","category":"section"},{"location":"examples/multiclassgp/#Run-sparse-multiclass-classification-with-different-level-of-noise","page":"Gaussian Process Multi-Class Classification","title":"Run sparse multiclass classification with different level of noise","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"models = Vector{AbstractGPModel}(undef, length(σs))\nkernel = SqExponentialKernel()\nnum_inducing = 50\nfor (i, σ) in enumerate(σs)\n    @info \"Training with data with noise $σ\"\n    X, y = generate_mixture_data(σ)\n    m = SVGP(\n        kernel,\n        LogisticSoftMaxLikelihood(n_class),\n        AnalyticVI(),\n        inducingpoints(KmeansAlg(num_inducing), X);\n        optimiser=false,\n        Zoptimiser=false,\n    )\n    @time train!(m, X, y, 20)\n    models[i] = m\nend","category":"page"},{"location":"examples/multiclassgp/#Function-to-create-predictions-and-plot-them","page":"Gaussian Process Multi-Class Classification","title":"Function to create predictions and plot them","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"function compute_grid(model, n_grid=50)\n    xlin = range(minx, maxx; length=n_grid)\n    ylin = range(minx, maxx; length=n_grid)\n    x_grid = Iterators.product(xlin, ylin)\n    y_p = proba_y(model, vec(collect.(x_grid)))\n    y = predict_y(model, vec(collect.(x_grid)))\n    return y_p, y, xlin, ylin\nend;\n\nfunction plot_contour(model, σ)\n    n_grid = 100\n    pred_proba, pred, x, y = compute_grid(model, n_grid)\n    colors = reshape(\n        [\n            RGB([pred_proba[model.likelihood.ind_mapping[j]][i] for j in 1:n_class]...) for\n            i in 1:(n_grid^2)\n        ],\n        n_grid,\n        n_grid,\n    ) # Convert the predictions into an RGB array\n    Plots.contour(\n        x,\n        y,\n        colors;\n        cbar=false,\n        fill=false,\n        color=:black,\n        linewidth=2.0,\n        title=\"sigma = $σ\",\n    )\n    return Plots.contour!(\n        x,\n        y,\n        reshape(pred, n_grid, n_grid);\n        clims=(0, 100),\n        colorbar=false,\n        color=:gray,\n        levels=10,\n    )\nend;\nnothing #hide","category":"page"},{"location":"examples/multiclassgp/#Plot-the-final-results","page":"Gaussian Process Multi-Class Classification","title":"Plot the final results","text":"","category":"section"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"Plots.plot(plot_contour.(models, σs)...)","category":"page"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"","category":"page"},{"location":"examples/multiclassgp/","page":"Gaussian Process Multi-Class Classification","title":"Gaussian Process Multi-Class Classification","text":"This page was generated using Literate.jl.","category":"page"},{"location":"kernel/#Kernels-(Covariance-functions)","page":"Kernels","title":"Kernels (Covariance functions)","text":"","category":"section"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"Kernels are entirely defered to the package KernelFunctions.jl, you can have a look at the documentation to see which are available. Note that, for now, optimization is only possible for ScaleTransform or ARDTransform with ForwardDiff while all others should be compatible with Zygote.","category":"page"},{"location":"kernel/#Hyperparameter-optimization","page":"Kernels","title":"Hyperparameter optimization","text":"","category":"section"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"The advantage of Gaussian Processes is that it is possible to optimize all the hyperparameters of the model by optimizing the lower bound on the log evidence. One can compute the gradient of it and apply a classical gradient descent algorithm.","category":"page"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"Unlike most other packages, the derivatives are computed analytically. One needs to compute the matrix derivatives via the kernel derivatives. If K was defined via k(xx) then :","category":"page"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"fracd Kdtheta  = J_theta","category":"page"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"Where J_theta was defined via fracdk(xx)dtheta. This part is done by automatic differentiation. To chose between Zygote or ForwardDiff use AGP.setKadbackend(:reverse_diff) or AGP.setKadbackend(:forward_diff) respectively.","category":"page"},{"location":"kernel/","page":"Kernels","title":"Kernels","text":"The rest of the work is simply matrix algebra.","category":"page"}]
}