multiple updates

jumutc · Jul 8, 2015 · 74e0c13 · 74e0c13
1 parent c78f2c7
commit 74e0c13
Show file tree

Hide file tree

Showing 14 changed files with 195 additions and 92 deletions.
diff --git a/src/SALSA.jl b/src/SALSA.jl
@@ -2,7 +2,7 @@ module SALSA
 
 export salsa, mapstd, make_sparse, entropysubset, AFEm, gen_cross_validate
 
-using MLBase, Distributions, Compat
+using MLBase, Distributions, Compat, Distances
 
 
 # Calculate the misclassification rate
@@ -12,7 +12,7 @@ sse(y, yhat) = norm(y-yhat)^2
 # Calculates the average squared difference between the corresponding elements of two vectors
 mse(y, yhat) = sse(y, yhat)/length(yhat)
 # Area Under ROC surve with latent output y
-auc(y, ylat; n=100) = auc(roc(int(y)[:], ylat[:], n))
+auc(y, ylat; n=100) = std(ylat[:]) == 0 ? 0.0 : auc(roc(int(y)[:], ylat[:], n))
 # provide convenient function for parallalizing cross-validation
 nfolds() = if nworkers() == 1 || nworkers() > 10 10 else nworkers() end
 # helper function for AUC calculus
@@ -45,9 +45,10 @@ include(joinpath("algorithms", "l1rda_alg.jl"))
 include(joinpath("algorithms", "adaptive_l1rda_alg.jl"))
 include(joinpath("algorithms", "reweighted_l1rda_alg.jl"))
 include(joinpath("algorithms", "reweighted_l2rda_alg.jl"))
-include(joinpath("algorithms", "stochastic_ppc.jl"))
+include(joinpath("algorithms", "stochastic_rk_means.jl"))
 include(joinpath("algorithms", "pegasos_alg.jl"))
 include(joinpath("algorithms", "dropout_alg.jl"))
+include(joinpath("algorithms", "sgd_alg.jl"))
 # tuning + validation
 include("run_algorithm.jl")
 include("validation_criteria.jl")

diff --git a/src/SALSAModel.jl b/src/SALSAModel.jl
@@ -1,6 +1,6 @@
 export SALSAModel, 
-       HINGE, PINBALL, LOGISTIC, LEAST_SQUARES,
-       PEGASOS, L1RDA, ADA_L1RDA, R_L1RDA, R_L2RDA, DROP_OUT, RDA, SGD, PPC,
+       HINGE, PINBALL, LOGISTIC, LEAST_SQUARES, MODIFIED_HUBER,
+       SIMPLE_SGD, PEGASOS, L1RDA, ADA_L1RDA, R_L1RDA, R_L2RDA, DROP_OUT, RDA, SGD, RK_MEANS,
        LINEAR, NONLINEAR
 
 abstract Model
@@ -9,6 +9,7 @@ abstract Loss
 abstract NonParametricLoss <: Loss
 immutable HINGE <: NonParametricLoss end
 immutable LOGISTIC <: NonParametricLoss end
+immutable MODIFIED_HUBER <: NonParametricLoss end
 immutable LEAST_SQUARES <: NonParametricLoss end
 immutable PINBALL <: Loss end
 
@@ -21,8 +22,9 @@ immutable R_L1RDA <: RDA end
 immutable R_L2RDA <: RDA end
 immutable ADA_L1RDA <: RDA end
 immutable DROP_OUT <: SGD end
-# special algorithm type for Proximal Plane Clustering
-immutable PPC{A <: Algorithm} <: Algorithm 
+immutable SIMPLE_SGD <: SGD end
+# special algorithm type for Regularized K-Means
+immutable RK_MEANS{A <: Algorithm} <: Algorithm 
     support_alg::Type{A}
     k_clusters::Int
     max_iter::Int
@@ -61,7 +63,7 @@ type SALSAModel{L <: Loss, A <: Algorithm,
     max_iter::Int 
     max_cv_k::Int 
     max_k::Int
-    online_pass::Bool
+    online_pass::Int
     normalized::Bool
     tolerance::Float64
     sparsity_cv::Float64
@@ -74,20 +76,21 @@ end
 
 # outer constructor to alleviate instantiation of a SLASAModel
 SALSAModel{L <: Loss, A <: Algorithm, M <: Mode, K <: Kernel}(
-            mode::Type{M}, alg::A,
-            loss_function::Type{L};
-            kernel::Type{K} = RBFKernel,
-            global_opt::GlobalOpt = CSA(),
-            subset_size::Float64 = 5e-1,
-            max_cv_iter::Int = 1000,
-            max_iter::Int = 1000,
-            max_cv_k::Int = 1,
-            max_k::Int = 1,
-            online_pass::Bool = false,
-            normalized::Bool = true,
-            tolerance::Float64 = 1e-5,
-            sparsity_cv::Float64 = 2e-2,
-            validation_criteria = MISCLASS(),
+            mode::Type{M},                      # mode used to learn model: LINEAR vs. NONLINEAR
+            algorithm::A,                       # algorithm used to learn the model, e.g. PEGASOS 
+            loss_function::Type{L};             # type of a loss function used to learn model, e.g. HINGE
+            kernel::Type{K} = RBFKernel,        # kernel used in NONLINEAR mode to compute Nystrom approx.
+            global_opt::GlobalOpt = CSA(),      # global optimization techniques for tuning hyperparameters
+            subset_size::Float64 = 5e-1,        # subset size used in NONLINEAR mode to compute Nystrom approx.
+            max_cv_iter::Int = 1000,            # maximal number of iterations (budget) for any algorithm in training CV 
+            max_iter::Int = 1000,               # maximal number of iterations (budget) for any algorithm for final training 
+            max_cv_k::Int = 1,                  # maximal number of data points used to compute loss derivative in training CV 
+            max_k::Int = 1,                     # maximal number of data points used to compute loss derivative for final training 
+            online_pass::Int = 0,               # if > 0 we are in the online learning setting going through entire dataset <online_pass> times
+            normalized::Bool = true,            # normalize data (extracting mean and std) before passing it to CV and final learning 
+            tolerance::Float64 = 1e-5,          # criteria ||w_{t+1} - w_t|| <= tolerance is evaluated for early stopping (online_pass==0) 
+            sparsity_cv::Float64 = 2e-2,        # sparisty affinity compelment to any validation_criteria for CV used in RDA type of algorithms 
+            validation_criteria = MISCLASS(),   # validation criteria used to verify the generalization capabilities of the model in CV
             cv_gen = @compat Nullable{CrossValGenerator}()) = 
-        SALSAModel(mode,alg,kernel,loss_function,global_opt,subset_size,max_cv_iter,max_iter,max_cv_k,
+        SALSAModel(mode,algorithm,kernel,loss_function,global_opt,subset_size,max_cv_iter,max_iter,max_cv_k,
                    max_k,online_pass,normalized,tolerance,sparsity_cv,validation_criteria,cv_gen,OutputModel{mode}())
diff --git a/src/algorithms/adaptive_l1rda_alg.jl b/src/algorithms/adaptive_l1rda_alg.jl
@@ -1,7 +1,7 @@
 export adaptive_l1rda_alg 
 
 function adaptive_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64, 
-                   k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+                   k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
 
     # Internal function for a simple Adaptive l1-RDA routine
     #
@@ -32,9 +32,12 @@ function adaptive_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ:
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -67,7 +70,7 @@ function adaptive_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ:
         end
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/dropout_alg.jl b/src/algorithms/dropout_alg.jl
@@ -1,6 +1,6 @@
 export dropout_alg 
 
-function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
     # Internal function for a simple Dropout Pegasos routine
     #
     # Copyright (c) 2015, KU Leuven-ESAT-STADIUS, License & help @
@@ -28,9 +28,12 @@ function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -63,7 +66,7 @@ function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
         end
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && ~check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && ~check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/l1rda_alg.jl b/src/algorithms/l1rda_alg.jl
@@ -1,7 +1,7 @@
 export l1rda_alg 
 
 function l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64, 
-                   k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+                   k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
 
     # Internal function for a simple l1-RDA routine
     #
@@ -30,9 +30,12 @@ function l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64,
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -62,7 +65,7 @@ function l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64,
         end
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/pegasos_alg.jl b/src/algorithms/pegasos_alg.jl
@@ -1,6 +1,6 @@
 export pegasos_alg
 
-function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
     # Internal function for a simple Pegasos routine
     #
     # Copyright (c) 2015, KU Leuven-ESAT-STADIUS, License & help @
@@ -28,9 +28,12 @@ function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -46,12 +49,12 @@ function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
         # do a gradient descent step
         η_t = 1/(λ*t)
         w = (1 - η_t*λ).*w
-        w = w - (η_t/k).*dfunc(At,yt,w_prev)
+        w = w - (η_t/k).*dfunc(At,yt,w_prev)#*class_weight[yt]
         # project back to the set B: w \in convex set B
         w = min(1,1/(sqrt(λ)*vecnorm(w))).*w
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && ~check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && ~check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/reweighted_l1rda_alg.jl b/src/algorithms/reweighted_l1rda_alg.jl
@@ -1,7 +1,7 @@
 export reweighted_l1rda_alg 
 
 function reweighted_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64, ɛ::Float64,
-                              k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+                              k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
 
     # Internal function for a simple l1-RDA routine
     #
@@ -32,9 +32,12 @@ function reweighted_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64,
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -68,7 +71,7 @@ function reweighted_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64,
         end
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/reweighted_l2rda_alg.jl b/src/algorithms/reweighted_l2rda_alg.jl
@@ -1,7 +1,7 @@
 export reweighted_l2rda_alg 
 
 function reweighted_l2rda_alg(dfunc::Function, X, Y, λ::Float64, ɛ::Float64, varɛ::Float64, 
-                              k::Int, max_iter::Int, tolerance::Float64, online_pass=false, train_idx=[])
+                              k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
 
     # Internal function for a simple Reweighted l2-RDA routine
     #
@@ -32,9 +32,12 @@ function reweighted_l2rda_alg(dfunc::Function, X, Y, λ::Float64, ɛ::Float64, v
         space = 1:1:N
     end
 
-    if online_pass
-        max_iter = N
-        smpl = (t,k) -> t
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
     else
         pd = Categorical(N)
         smpl = (t,k) -> rand(pd,k)
@@ -63,7 +66,7 @@ function reweighted_l2rda_alg(dfunc::Function, X, Y, λ::Float64, ɛ::Float64, v
         end
 
         # check the stopping criteria w.r.t. Tolerance, check, online_pass
-        if ~online_pass && check && vecnorm(w - w_prev) < tolerance
+        if online_pass == 0 && check && vecnorm(w - w_prev) < tolerance
             break
         end
     end

diff --git a/src/algorithms/sgd_alg.jl b/src/algorithms/sgd_alg.jl
@@ -0,0 +1,59 @@
+export sgd_alg
+
+function sgd_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tolerance::Float64, online_pass=0, train_idx=[])
+    # Internal function for a simple SGD routine for λ-strongly convex functions
+    #
+    # Copyright (c) 2015, KU Leuven-ESAT-STADIUS, License & help @
+    # http://www.esat.kuleuven.be/stadius/ADB/jumutc/softwareSALSA.php
+
+    N = size(X,1)
+    d = size(X,2) + 1
+    check = issparse(X) 
+
+    if ~check
+        w = rand(d)
+        A = [X'; ones(1,N)]
+    else 
+        total = length(X.nzval)
+        w = sprand(d,1,total/(N*d))
+        A = [X'; sparse(ones(1,N))]
+    end
+
+    if ~isempty(train_idx)
+        space = train_idx
+        N = size(space,1)
+    else
+        space = 1:1:N
+    end
+
+    if online_pass > 0
+        max_iter = N*online_pass
+        smpl = (t,k) -> begin
+            s = t % N 
+            s > 0 ? s : N
+        end
+    else
+        pd = Categorical(N)
+        smpl = (t,k) -> rand(pd,k)
+    end
+
+    for t=1:max_iter 
+        idx = space[smpl(t,k)]
+        w_prev = w
+
+        yt = Y[idx]
+        At = A[:,idx]
+
+        # do a gradient descent step
+        η_t = 1/(λ*t)
+        w = (1 - λ*η_t).*w
+        w = w - (η_t/k).*dfunc(At,yt,w_prev)
+
+        # check the stopping criteria w.r.t. Tolerance, check, online_pass
+        if online_pass == 0 && ~check && vecnorm(w - w_prev) < tolerance
+            break
+        end
+    end
+
+    w[1:end-1,:], w[end,:]
+end