Skip to content

Commit

Permalink
Merge 004915e into 2b90bc3
Browse files Browse the repository at this point in the history
  • Loading branch information
theogf committed Jul 19, 2022
2 parents 2b90bc3 + 004915e commit 8e120b5
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 108 deletions.
40 changes: 24 additions & 16 deletions docs/examples/gpclassification.jl
Expand Up @@ -17,39 +17,47 @@ data.Class[data.Class .== 2] .= -1
data = Matrix(data)
X = data[:, 1:2]
Y = Int.(data[:, end]);
(X_train, y_train), (X_test, y_test) = splitobs((X, Y), 0.5, ObsDim.First())
# ### We create a function to visualize the data
(X_train, y_train), (X_test, y_test) = splitobs((X, Y), 0.5, ObsDim.First()) # We split the data into train and test set

# ### We create a function to visualize the data
function plot_data(X, Y; size=(300, 500))
return Plots.scatter(
eachcol(X)...; group=Y, alpha=0.2, markerstrokewidth=0.0, lab="", size=size
eachcol(X)...; xlabel="x", ylabel="y", group=Y, alpha=0.2, markerstrokewidth=0.0, lab="", size=size
)
end
plot_data(X, Y; size=(500, 500))
plot_data(X, Y; size=(500, 800))

# ### Run sparse classification with increasing number of inducing points
Ms = [4, 8, 16, 32, 64]
# ## Model initialization and training
# Using Gaussian processes to solve binary classification problem is usually defined as
# ```math
# y \sim \mathrm{Bernoulli}(h(f))
# ```
# where ``h`` is the inverse link.
# Multiple choices exist for ``h`` but we will focus mostly on ``h(x)=\sigma(x)=(1+\exp(-x))^{-1}``, i.e. the logistic function.
# ### Run sparse classification with an increasing number of inducing points
Ms = [4, 8, 16, 32, 64] # Number of inducing points
models = Vector{AbstractGPModel}(undef, length(Ms) + 1)
kernel = SqExponentialKernel() ScaleTransform(1.0)
kernel = with_lengthscale(SqExponentialKernel(), 1.0) # We create a standard kernel with lengthscale 1
for (i, num_inducing) in enumerate(Ms)
@info "Training with $(num_inducing) points"
m = SVGP(
kernel,
LogisticLikelihood(),
AnalyticVI(),
inducingpoints(KmeansAlg(num_inducing), X);
optimiser=false,
Zoptimiser=false,
inducingpoints(KmeansAlg(num_inducing), X); # Z is selected via the kmeans algorithm
optimiser=false, # We keep the kernel parameters fixed
Zoptimiser=false, # We keep the inducing points locations fixed
)
@time train!(m, X_train, y_train, 20)
models[i] = m
@time train!(m, X_train, y_train, 5) # We train the model on the training data for 5 iterations
models[i] = m # And store the model
end
# ### Running the full model
# ### We initiliaze and train the non sparse model
@info "Running full model"
mfull = VGP(X_train, y_train, kernel, LogisticLikelihood(), AnalyticVI(); optimiser=false)
@time train!(mfull, 5)
models[end] = mfull

# ## Prediction visualization
# ### We create a prediction and plot function on a grid
function compute_grid(model, n_grid=50)
mins = [-3.25, -2.85]
Expand Down Expand Up @@ -95,9 +103,9 @@ Plots.plot(
plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
)

# ## Bayesian SVM vs Logistic
# ### We now create a model with the Bayesian SVM likelihood

# ## Bayesian SVM vs Logistic link
# ### We now create another full model but with the Bayesian SVM link
@info "Running model with Bayesian SVM Likelihood"
mbsvm = VGP(X_train, y_train, kernel, BayesianSVM(), AnalyticVI(); optimiser=false)
@time train!(mbsvm, 5)
# ### And compare it with the Logistic likelihood
Expand Down
111 changes: 67 additions & 44 deletions docs/examples/gpevents.jl
Expand Up @@ -3,67 +3,90 @@
# ## Preliminary steps
#
# ### Loading necessary packages
using Plots
using AugmentedGaussianProcesses
using Distributions
using Plots
default(msw=0.0, alpha=0.5, lw=3.0)

# ## Creating some random data
# ### Creating some random data
# We first create some random data for the negative binomial problem
# using the generative model
n_data = 200
X = (rand(n_data) .- 0.5) * 40
x = (rand(n_data) .- 0.5) * 20
r = 5.0
Y = rand.(NegativeBinomial.(r, AGP.logistic.(sin.(X))))
scatter(X, Y)
y_bin = rand.(NegativeBinomial.(r, AGP.logistic.(-sin.(x))))
# We use -sin because of the difference of definition between Wikipedia and Distributions.jl.
# We do the same for the Poisson problem
λ = 10.0
y_poisson = rand.(Poisson.(λ * AGP.logistic.(sin.(x))))
# And visualize the resulting data
plot(
scatter(x, y_bin; lab="", title="Negative Binomial"),
scatter(x, y_poisson; lab="", title="Poisson");
xlabel="x",
ylabel="y"
)

# ## Run GP model with negative binomial likelihood to learn p
kernel = SqExponentialKernel() ScaleTransform(1.0)
# ## Initialization and training of the models
# ### Run GP model with negative binomial likelihood to learn p
kernel = with_lengthscale(SqExponentialKernel(), 1.0)
m_negbinomial = VGP(
X, Y, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
x, y_bin, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
)
@time train!(m_negbinomial, 20)
@time train!(m_negbinomial, 5); # Train the model for 5 iterations

# ## Running the same model but with a Poisson likelihood
kernel = SqExponentialKernel() ScaleTransform(1.0)
# ### Running the same model but with the Poisson likelihood
kernel = with_lengthscale(SqExponentialKernel(), 1.0)
m_poisson = VGP(
X, Y, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
x, y_poisson, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
)
@time train!(m_poisson, 20)
@time train!(m_poisson, 5);

# Prediction and plot function on a grid
# Create a grid and compute prediction on it
function compute_grid(model, n_grid=50)
mins = -20
maxs = 20
# ## Prediction and plot function on a grid
# Create a grid and compute predictive mean of `y` on it
function compute_pred_y_grid(model, n_grid=50)
mins = -12
maxs = 12
x_grid = range(mins, maxs; length=n_grid) # Create a grid
y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
y_grid, sig_y_grid = proba_y(model, x_grid) # Predict the mean and variance on the grid
return y_grid, sig_y_grid, x_grid
end

# Create a grid and compute the predictive mean of `f` on it
function compute_pred_f_grid(model, n_grid=50)
mins = -12
maxs = 12
x_grid = range(mins, maxs; length=n_grid) # Create a grid
f_grid, f_var_grid = predict_f(model, x_grid; cov=true) # Predict the mean and variance on the grid
return f_grid, f_var_grid, x_grid
end
# Plot the data as a scatter plot
function plot_data(X, Y)
return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(800, 500))
function plot_data(x, y)
return Plots.scatter(x, y; lab="", size=(800, 500))
end

function plot_f(model)
f, f_var, x_grid = compute_pred_f_grid(model, 100)
plot(x_grid, sin; lab="sin", title="Latent f")
plot!(x_grid, f; ribbon=2 * sqrt.(f_var), lab="f")
end

function plot_model(model, X, Y, title=nothing)
n_grid = 100
y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
p = plot_data(X, Y)
Plots.plot!(
p,
x_grid,
y_grid;
ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations
title=title,
color="red",
lab="",
linewidth=3.0,
)
return p
end;
function plot_y(model, x, y_bin)
pred_y, pred_y_var, x_grid = compute_pred_y_grid(model, 100)
true_p_y = model.likelihood.(sin.(x_grid))
true_y, true_var_y = mean.(true_p_y), var.(true_p_y)
plot_data(x, y_bin)
plot!(x_grid, true_y; ribbon=2 * sqrt.(true_var_y), lab="true p(y|f)", title="Expectation of y")
plot!(x_grid, pred_y; ribbon=2 * sqrt.(pred_y_var), label="pred p(y|f)")
end

# ## Comparison between the two likelihoods
Plots.plot(
plot_model.(
[m_negbinomial, m_poisson], Ref(X), Ref(Y), ["Neg. Binomial", "Poisson"]
)...;
layout=(1, 2),
# ## Plotting the comparison results
## We first compare the results for the Negative Binomial likelihood
plot(
plot_f(m_negbinomial),
plot_y(m_negbinomial, x, y_bin)
)
## And we do the same for the Poisson likelihood
plot(
plot_f(m_poisson),
plot_y(m_poisson, x, y_poisson)
)
51 changes: 26 additions & 25 deletions docs/examples/gpregression.jl
Expand Up @@ -5,18 +5,18 @@ using AugmentedGaussianProcesses
using Distributions
using Plots

# We create a toy dataset with X ∈ [-20, 20] and y = 5 * sinc(X)
# We create a toy dataset with `x ∈ [-10, 10]` and `y = 5 * sinc(X)``
N = 1000
X = reshape((sort(rand(N)) .- 0.5) * 40.0, N, 1)
σ = 0.01
x = (sort(rand(N)) .- 0.5) * 20.0
σ = 0.01 # Standard gaussian noise

function latent(x)
return 5.0 * sinc.(x)
end
Y = vec(latent(X) + σ * randn(N));
y = latent(x) + σ * randn(N);

# Visualization of the data :
scatter(X, Y; lab="")
scatter(x, y; lab="")

# ## Gaussian noise

Expand All @@ -28,27 +28,27 @@ Ms = [4, 8, 16, 32, 64];
# Create an empty array of GPs
models = Vector{AbstractGPModel}(undef, length(Ms) + 1);
# Chose a kernel
kernel = SqExponentialKernel();# + PeriodicKernel()
kernel = with_lengthscale(SqExponentialKernel(), 1.0);
# And Run sparse classification with an increasing number of inducing points
for (index, num_inducing) in enumerate(Ms)
@info "Training with $(num_inducing) points"
m = SVGP(
kernel, # Kernel
GaussianLikelihood(σ), # Likelihood used
AnalyticVI(), # Inference usede to solve the problem
inducingpoints(KmeansAlg(num_inducing), X); # Inducing points initialized with kmeans
range(-10, 10; length=num_inducing); # Simple range
optimiser=false, # Keep kernel parameters fixed
Zoptimiser=false, # Keep inducing points locations fixed
)
@time train!(m, X, Y, 100) # Train the model for 100 iterations
@time train!(m, x, y, 100) # Train the model for 100 iterations
models[index] = m # Save the model in the array
end

# Train the model without any inducing points (no approximation)
@info "Training with full model"
mfull = GP(
X,
Y,
x,
y,
kernel;
noise=σ,
opt_noise=false, # Keep the noise value fixed
Expand All @@ -59,19 +59,19 @@ models[end] = mfull;

# Create a grid and compute prediction on it
function compute_grid(model, n_grid=50)
mins = -20
maxs = 20
mins = -10
maxs = 10
x_grid = range(mins, maxs; length=n_grid) # Create a grid
y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
return y_grid, sig_y_grid, x_grid
end;

# Plot the data as a scatter plot
function plotdata(X, Y)
return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(300, 500))
function plotdata(x, y)
return Plots.scatter(x, y; alpha=0.33, msw=0.0, lab="", size=(300, 500))
end

function plot_model(model, X, Y, title=nothing)
function plot_model(model, x, y, title=nothing)
n_grid = 100
y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
title = if isnothing(title)
Expand All @@ -80,7 +80,7 @@ function plot_model(model, X, Y, title=nothing)
title
end

p = plotdata(X, Y)
p = plotdata(x, y)
Plots.plot!(
p,
x_grid,
Expand All @@ -94,9 +94,9 @@ function plot_model(model, X, Y, title=nothing)
if model isa SVGP # Plot the inducing points as well
Plots.plot!(
p,
vec(model.f[1].Z),
zeros(dim(model.f[1]));
msize=2.0,
model.f[1].Z,
mean(model.f[1]);
msize=5.0,
color="black",
t=:scatter,
lab="",
Expand All @@ -106,7 +106,7 @@ function plot_model(model, X, Y, title=nothing)
end;

Plots.plot(
plot_model.(models, Ref(X), Ref(Y))...; layout=(1, length(models)), size=(1000, 200)
plot_model.(models, Ref(x), Ref(y))...; layout=(2, 3), size=(800, 600)
) # Plot all models and combine the plots

# ## Non-Gaussian Likelihoods
Expand All @@ -124,8 +124,8 @@ ngmodels = Vector{AbstractGPModel}(undef, length(likelihoods) + 1)
for (i, l) in enumerate(likelihoods)
@info "Training with the $(l)" # We need to use VGP
m = VGP(
X,
Y, # First arguments are the input and output
x,
y, # First arguments are the input and output
kernel, # Kernel
l, # Likelihood used
AnalyticVI(); # Inference usede to solve the problem
Expand All @@ -139,8 +139,9 @@ ngmodels[end] = models[end] # Add the Gaussian model
# We can now repeat the prediction from before :
Plots.plot(
plot_model.(
ngmodels, Ref(X), Ref(Y), ["Student-T", "Laplace", "Heteroscedastic", "Gaussian"]
ngmodels, Ref(x), Ref(y), ["Student-T", "Laplace", "Heteroscedastic", "Gaussian"]
)...;
layout=(2, 2),
size=(1000, 200),
) # Plot all models and combine the plots
ylims=(-8, 10),
size=(700, 400),
)# Plot all models and combine the plots

0 comments on commit 8e120b5

Please sign in to comment.