Merge pull request #159 from jump-dev/jg/examples

Fix and simplify examples
jump-dev · Dec 2, 2021 · ebee185 · ebee185
2 parents 0ab7e17 + 82ec6be
commit ebee185
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 89 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -9,3 +9,4 @@ OSQP = "ab2f91bb-94b4-55e3-9ba0-7f65df51de79"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
diff --git a/docs/src/examples/sensitivity-analysis-svm.jl b/docs/src/examples/sensitivity-analysis-svm.jl
@@ -11,55 +11,61 @@
 # ```math
 # \begin{split}
 # \begin{array} {ll}
-# \mbox{minimize} & \sum_{i=1}^{N} \xi_{i} \\
-# \mbox{s.t.} & \xi_{i} \ge 0 \quad i=1..N  \\
-#             & y_{i} (w^T X_{i} + b) \ge 1 - \xi[i]\\
+# \mbox{minimize} & \lambda||w||^2 + \sum_{i=1}^{N} \xi_{i} \\
+# \mbox{s.t.} & \xi_{i} \ge 0 \quad \quad i=1..N  \\
+#             & y_{i} (w^T X_{i} + b) \ge 1 - \xi_{i} \quad i=1..N  \\
 # \end{array}
 # \end{split}
 # ```
 # where
 # - `X`, `y` are the `N` data points
+# - `w` is the support vector
+# - `b` determines the offset `b/||w||` of the hyperplane with normal `w`
 # - `ξ` is the soft-margin loss.
 
-# ## Define and solve the SVM
-
-# Import the libraries.
-
+# This tutorial uses the following packages
 
-using SCS, DiffOpt, LinearAlgebra, JuMP
-import Random, Plots
+using JuMP     # The mathematical programming modelling language
+import DiffOpt # JuMP extension for differentiable optimization
+import Ipopt   # Optimization solver that handles quadratic programs
+import Plots   # Graphing tool
+import LinearAlgebra: dot, norm, normalize!
+import Random
 
+# ## Define and solve the SVM
 
 # Construct separable, non-trivial data points.
 
 N = 100
 D = 2
 Random.seed!(62)
-X = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [4.0, 1.5]')
-y = append!(ones(N ÷ 2), -ones(N ÷ 2));
+X = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [4.5, 2.0]')
+y = append!(ones(N ÷ 2), -ones(N ÷ 2))
+λ = 0.05;
 
 
 # Let's initialize a special model that can understand sensitivities
 
-model = Model(() -> diff_optimizer(SCS.Optimizer))
+model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
 MOI.set(model, MOI.Silent(), true)
 
 # Add the variables
 
-@variable(model, l[1:N])
+@variable(model, ξ[1:N])
 @variable(model, w[1:D])
 @variable(model, b);
 
 # Add the constraints.
 
 @constraint(
     model,
-    1.0 * l ∈ MOI.Nonnegatives(N),
-)
+    [i in 1:N],
+    ξ[i] >= 0
+);
 @constraint(
     model,
-    cons,
-    y .* (X * w .+ b) + l .- 1 ∈ MOI.Nonnegatives(N),
+    cons[i in 1:N],
+    y[i] * (dot(X[i, :], w) + b) >= 1 - ξ[i]
 );
 
 
@@ -68,19 +74,21 @@ MOI.set(model, MOI.Silent(), true)
 @objective(
     model,
     Min,
-    sum(l),
+    λ * dot(w, w) + sum(ξ),
 )
 
-optimize!(model) # solve
+optimize!(model)
 
 
 # We can visualize the separating hyperplane.
 
 loss = objective_value(model)
+
 wv = value.(w)
+
 bv = value(b)
 
-svm_x = [0.0, 3.0]
+svm_x = [0.0, 5.0] # arbitrary points
 svm_y = (-bv .- wv[1] * svm_x )/wv[2]
 
 p = Plots.scatter(X[:,1], X[:,2], color = [yi > 0 ? :red : :blue for yi in y], label = "")
@@ -94,62 +102,29 @@ Plots.plot!(p, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
 # - How does a change in labels of the data points (`y=1` to `y=-1`, and vice versa) affect the position of the hyperplane? This is achieved by finding the gradient of `w`, `b` with respect to `y[i]`, the classification label of the ith data point.
 # - How does a change in coordinates of the data points, `X`, affects the position of the hyperplane? This is achieved by finding gradient of `w`, `b` with respect to `X[i]`, 2D coordinates of the data points.
 
-# Note that finding the optimal SVM can be modelled as a conic optimization problem:
-
-# ```math
-# \begin{align*}
-# & \min_{x \in \mathbb{R}^n} & c^T x \\
-# & \text{s.t.}               & A x + s = b  \\
-# &                           & b \in \mathbb{R}^m  \\
-# &                           & s \in \mathcal{K}
-# \end{align*}
-# ```
-
-# where
-# ```math
-# \begin{align*}
-# c &= [l_1 - 1, l_2 -1, ... l_N -1, 0, 0, ... 0 \text{(D+1 times)}] \\\\
-
-# A &=
-# \begin{bmatrix}
-#  -l_1 &    0 & ... &    0 &            0 & ... & 0 & 0  \\
-#     0 & -l_2 & ... &    0 &            0 & ... & 0 & 0  \\
-#     : &    : & ... &    : &            0 & ... & 0 & 0  \\
-#     0 &    0 & ... & -l_N &            0 & ... & 0 & 0  \\
-#     0 &    0 & ... &    0 & -y_1 X_{1,1} & ... & -y_1 X_{1,N} & -y_1  \\
-#     0 &    0 & ... &    0 & -y_2 X_{2,1} & ... & -y_1 X_{2,N} & -y_2  \\
-#     : &    : & ... &    : &           :  & ... &          :   & :   \\
-#     0 &    0 & ... &    0 & -y_N X_{N,1} & ... & -y_N X_{N,N} & -y_N  \\
-# \end{bmatrix} \\\\
-
-# b &= [0, 0, ... 0 \text{(N times)}, l_1 - 1, l_2 -1, ... l_N -1] \\\\
-
-# \mathcal{K} &= \text{Set of Nonnegative cones}
-# \end{align*}
-# ```
-
-
 # ## Experiment 1: Gradient of hyperplane wrt the data point labels
 
 # Construct perturbations in data point labels `y` without changing the data point coordinates `X`.
 
-
-∇ = Float64[]
+∇ = zeros(N)
 dy = zeros(N);
 
-# begin differentiating
-for Xi in 1:N
-    dy[Xi] = 1.0  # set
-
-    MOI.set(
-        model,
-        DiffOpt.ForwardInConstraint(),
-        cons,
-        MOI.Utilities.vectorize(dy .* index(b)),
-    )
-
+# Begin differentiating the model.
+# analogous to varying θ in the expression:
+# ```math
+# (y_{i} + \theta) (w^T X_{i} + b) \ge 1 - \xi_{i}
+# ```
+for i in 1:N
+    dy[i] = 1.0 # set
+    for j in 1:N
+        MOI.set(
+            model,
+            DiffOpt.ForwardInConstraint(),
+            cons[j],
+            dy[j] * (dot(X[j,:], index.(w)) + index(b)),
+        )
+    end
     DiffOpt.forward(model)
-
     dw = MOI.get.(
         model,
         DiffOpt.ForwardOutVariablePrimal(),
@@ -160,9 +135,8 @@ for Xi in 1:N
         DiffOpt.ForwardOutVariablePrimal(),
         b,
     )
-    push!(∇, norm(dw) + norm(db))
-
-    dy[Xi] = 0.0  # reset the change made above
+    ∇[i] = norm(dw) + norm(db)
+    dy[i] = 0.0 # reset the change made at the beginning of the loop
 end
 
 normalize!(∇);
@@ -173,34 +147,35 @@ normalize!(∇);
 p2 = Plots.scatter(
     X[:,1], X[:,2],
     color = [yi > 0 ? :red : :blue for yi in y], label = "",
-    markersize = ∇ * 20,
+    markersize = 20 * max.(∇, 0.1 * maximum(∇)),
 )
 Plots.yaxis!(p2, (-2, 4.5))
-Plots.plot!(p2, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
+Plots.plot!(p2, svm_x, svm_y, label = "", width=3)
 
 
 # ## Experiment 2: Gradient of hyperplane wrt the data point coordinates
 
 # Similar to previous example, construct perturbations in data points coordinates `X`.
 
-∇ = Float64[]
+∇ = zeros(N)
 dX = zeros(N, D);
 
-# begin differentiating
-for Xi in 1:N
-    dX[Xi, :] = ones(D)  # set
-
-    for i in 1:D
+# Begin differentiating the model.
+# analogous to varying θ in the expression:
+# ```math
+# y_{i} (w^T (X_{i} + \theta) + b) \ge 1 - \xi_{i}
+# ```
+for i in 1:N
+    dX[i, :] = ones(D)  # set
+    for j in 1:N
         MOI.set(
             model,
             DiffOpt.ForwardInConstraint(),
-            cons,
-            MOI.Utilities.vectorize(dX[:,i] .* index(w[i])),
+            cons[j],
+            y[j] * dot(dX[j,:], index.(w)),
         )
     end
-
     DiffOpt.forward(model)
-
     dw = MOI.get.(
         model,
         DiffOpt.ForwardOutVariablePrimal(),
@@ -211,9 +186,8 @@ for Xi in 1:N
         DiffOpt.ForwardOutVariablePrimal(),
         b,
     )
-    push!(∇, norm(dw) + norm(db))
-
-    dX[Xi, :] = zeros(D)  # reset the change made at the beginning of the loop
+    ∇[i] = norm(dw) + norm(db)
+    dX[i, :] = zeros(D)  # reset the change made at the beginning of the loop
 end
 
 normalize!(∇);
@@ -223,7 +197,7 @@ normalize!(∇);
 p3 = Plots.scatter(
     X[:,1], X[:,2],
     color = [yi > 0 ? :red : :blue for yi in y], label = "",
-    markersize = ∇ * 20,
+    markersize = 20 * max.(∇, 0.1 * maximum(∇)),
 )
 Plots.yaxis!(p3, (-2, 4.5))
-Plots.plot!(p3, svm_x, svm_y, label = "loss = $(round(loss, digits=2))", width=3)
+Plots.plot!(p3, svm_x, svm_y, label = "", width=3)