Skip to content

Commit

Permalink
Merge pull request #159 from jump-dev/jg/examples
Browse files Browse the repository at this point in the history
Fix and simplify examples
  • Loading branch information
matbesancon committed Dec 2, 2021
2 parents 0ab7e17 + 82ec6be commit ebee185
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 89 deletions.
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ OSQP = "ab2f91bb-94b4-55e3-9ba0-7f65df51de79"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
152 changes: 63 additions & 89 deletions docs/src/examples/sensitivity-analysis-svm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,61 @@
# ```math
# \begin{split}
# \begin{array} {ll}
# \mbox{minimize} & \sum_{i=1}^{N} \xi_{i} \\
# \mbox{s.t.} & \xi_{i} \ge 0 \quad i=1..N \\
# & y_{i} (w^T X_{i} + b) \ge 1 - \xi[i]\\
# \mbox{minimize} & \lambda||w||^2 + \sum_{i=1}^{N} \xi_{i} \\
# \mbox{s.t.} & \xi_{i} \ge 0 \quad \quad i=1..N \\
# & y_{i} (w^T X_{i} + b) \ge 1 - \xi_{i} \quad i=1..N \\
# \end{array}
# \end{split}
# ```
# where
# - `X`, `y` are the `N` data points
# - `w` is the support vector
# - `b` determines the offset `b/||w||` of the hyperplane with normal `w`
# - `ξ` is the soft-margin loss.

# ## Define and solve the SVM

# Import the libraries.

# This tutorial uses the following packages

using SCS, DiffOpt, LinearAlgebra, JuMP
import Random, Plots
using JuMP # The mathematical programming modelling language
import DiffOpt # JuMP extension for differentiable optimization
import Ipopt # Optimization solver that handles quadratic programs
import Plots # Graphing tool
import LinearAlgebra: dot, norm, normalize!
import Random

# ## Define and solve the SVM

# Construct separable, non-trivial data points.

N = 100
D = 2
Random.seed!(62)
X = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [4.0, 1.5]')
y = append!(ones(N ÷ 2), -ones(N ÷ 2));
X = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [4.5, 2.0]')
y = append!(ones(N ÷ 2), -ones(N ÷ 2))
λ = 0.05;


# Let's initialize a special model that can understand sensitivities

model = Model(() -> diff_optimizer(SCS.Optimizer))
model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
MOI.set(model, MOI.Silent(), true)

# Add the variables

@variable(model, l[1:N])
@variable(model, ξ[1:N])
@variable(model, w[1:D])
@variable(model, b);

# Add the constraints.

@constraint(
model,
1.0 * l MOI.Nonnegatives(N),
)
[i in 1:N],
ξ[i] >= 0
);
@constraint(
model,
cons,
y .* (X * w .+ b) + l .- 1 MOI.Nonnegatives(N),
cons[i in 1:N],
y[i] * (dot(X[i, :], w) + b) >= 1 - ξ[i]
);


Expand All @@ -68,19 +74,21 @@ MOI.set(model, MOI.Silent(), true)
@objective(
model,
Min,
sum(l),
λ * dot(w, w) + sum(ξ),
)

optimize!(model) # solve
optimize!(model)


# We can visualize the separating hyperplane.

loss = objective_value(model)

wv = value.(w)

bv = value(b)

svm_x = [0.0, 3.0]
svm_x = [0.0, 5.0] # arbitrary points
svm_y = (-bv .- wv[1] * svm_x )/wv[2]

p = Plots.scatter(X[:,1], X[:,2], color = [yi > 0 ? :red : :blue for yi in y], label = "")
Expand All @@ -94,62 +102,29 @@ Plots.plot!(p, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
# - How does a change in labels of the data points (`y=1` to `y=-1`, and vice versa) affect the position of the hyperplane? This is achieved by finding the gradient of `w`, `b` with respect to `y[i]`, the classification label of the ith data point.
# - How does a change in coordinates of the data points, `X`, affects the position of the hyperplane? This is achieved by finding gradient of `w`, `b` with respect to `X[i]`, 2D coordinates of the data points.

# Note that finding the optimal SVM can be modelled as a conic optimization problem:

# ```math
# \begin{align*}
# & \min_{x \in \mathbb{R}^n} & c^T x \\
# & \text{s.t.} & A x + s = b \\
# & & b \in \mathbb{R}^m \\
# & & s \in \mathcal{K}
# \end{align*}
# ```

# where
# ```math
# \begin{align*}
# c &= [l_1 - 1, l_2 -1, ... l_N -1, 0, 0, ... 0 \text{(D+1 times)}] \\\\

# A &=
# \begin{bmatrix}
# -l_1 & 0 & ... & 0 & 0 & ... & 0 & 0 \\
# 0 & -l_2 & ... & 0 & 0 & ... & 0 & 0 \\
# : & : & ... & : & 0 & ... & 0 & 0 \\
# 0 & 0 & ... & -l_N & 0 & ... & 0 & 0 \\
# 0 & 0 & ... & 0 & -y_1 X_{1,1} & ... & -y_1 X_{1,N} & -y_1 \\
# 0 & 0 & ... & 0 & -y_2 X_{2,1} & ... & -y_1 X_{2,N} & -y_2 \\
# : & : & ... & : & : & ... & : & : \\
# 0 & 0 & ... & 0 & -y_N X_{N,1} & ... & -y_N X_{N,N} & -y_N \\
# \end{bmatrix} \\\\

# b &= [0, 0, ... 0 \text{(N times)}, l_1 - 1, l_2 -1, ... l_N -1] \\\\

# \mathcal{K} &= \text{Set of Nonnegative cones}
# \end{align*}
# ```


# ## Experiment 1: Gradient of hyperplane wrt the data point labels

# Construct perturbations in data point labels `y` without changing the data point coordinates `X`.


= Float64[]
= zeros(N)
dy = zeros(N);

# begin differentiating
for Xi in 1:N
dy[Xi] = 1.0 # set

MOI.set(
model,
DiffOpt.ForwardInConstraint(),
cons,
MOI.Utilities.vectorize(dy .* index(b)),
)

# Begin differentiating the model.
# analogous to varying θ in the expression:
# ```math
# (y_{i} + \theta) (w^T X_{i} + b) \ge 1 - \xi_{i}
# ```
for i in 1:N
dy[i] = 1.0 # set
for j in 1:N
MOI.set(
model,
DiffOpt.ForwardInConstraint(),
cons[j],
dy[j] * (dot(X[j,:], index.(w)) + index(b)),
)
end
DiffOpt.forward(model)

dw = MOI.get.(
model,
DiffOpt.ForwardOutVariablePrimal(),
Expand All @@ -160,9 +135,8 @@ for Xi in 1:N
DiffOpt.ForwardOutVariablePrimal(),
b,
)
push!(∇, norm(dw) + norm(db))

dy[Xi] = 0.0 # reset the change made above
∇[i] = norm(dw) + norm(db)
dy[i] = 0.0 # reset the change made at the beginning of the loop
end

normalize!(∇);
Expand All @@ -173,34 +147,35 @@ normalize!(∇);
p2 = Plots.scatter(
X[:,1], X[:,2],
color = [yi > 0 ? :red : :blue for yi in y], label = "",
markersize = * 20,
markersize = 20 * max.(∇, 0.1 * maximum(∇)),
)
Plots.yaxis!(p2, (-2, 4.5))
Plots.plot!(p2, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
Plots.plot!(p2, svm_x, svm_y, label = "", width=3)


# ## Experiment 2: Gradient of hyperplane wrt the data point coordinates

# Similar to previous example, construct perturbations in data points coordinates `X`.

= Float64[]
= zeros(N)
dX = zeros(N, D);

# begin differentiating
for Xi in 1:N
dX[Xi, :] = ones(D) # set

for i in 1:D
# Begin differentiating the model.
# analogous to varying θ in the expression:
# ```math
# y_{i} (w^T (X_{i} + \theta) + b) \ge 1 - \xi_{i}
# ```
for i in 1:N
dX[i, :] = ones(D) # set
for j in 1:N
MOI.set(
model,
DiffOpt.ForwardInConstraint(),
cons,
MOI.Utilities.vectorize(dX[:,i] .* index(w[i])),
cons[j],
y[j] * dot(dX[j,:], index.(w)),
)
end

DiffOpt.forward(model)

dw = MOI.get.(
model,
DiffOpt.ForwardOutVariablePrimal(),
Expand All @@ -211,9 +186,8 @@ for Xi in 1:N
DiffOpt.ForwardOutVariablePrimal(),
b,
)
push!(∇, norm(dw) + norm(db))

dX[Xi, :] = zeros(D) # reset the change made at the beginning of the loop
∇[i] = norm(dw) + norm(db)
dX[i, :] = zeros(D) # reset the change made at the beginning of the loop
end

normalize!(∇);
Expand All @@ -223,7 +197,7 @@ normalize!(∇);
p3 = Plots.scatter(
X[:,1], X[:,2],
color = [yi > 0 ? :red : :blue for yi in y], label = "",
markersize = * 20,
markersize = 20 * max.(∇, 0.1 * maximum(∇)),
)
Plots.yaxis!(p3, (-2, 4.5))
Plots.plot!(p3, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
Plots.plot!(p3, svm_x, svm_y, label = "", width=3)

0 comments on commit ebee185

Please sign in to comment.