Skip to content

Commit

Permalink
data wrapper is added
Browse files Browse the repository at this point in the history
  • Loading branch information
jumutc committed Jul 16, 2015
1 parent 0942edb commit 768c1a1
Show file tree
Hide file tree
Showing 20 changed files with 264 additions and 42 deletions.
4 changes: 3 additions & 1 deletion REQUIRE
Expand Up @@ -2,4 +2,6 @@ MAT
MLBase
Compat
Distances
Distributions
Distributions
DataArrays
DataFrames
150 changes: 150 additions & 0 deletions data/iris.data.csv
@@ -0,0 +1,150 @@
5.1,3.5,1.4,0.2,1
4.9,3,1.4,0.2,1
4.7,3.2,1.3,0.2,1
4.6,3.1,1.5,0.2,1
5,3.6,1.4,0.2,1
5.4,3.9,1.7,0.4,1
4.6,3.4,1.4,0.3,1
5,3.4,1.5,0.2,1
4.4,2.9,1.4,0.2,1
4.9,3.1,1.5,0.1,1
5.4,3.7,1.5,0.2,1
4.8,3.4,1.6,0.2,1
4.8,3,1.4,0.1,1
4.3,3,1.1,0.1,1
5.8,4,1.2,0.2,1
5.7,4.4,1.5,0.4,1
5.4,3.9,1.3,0.4,1
5.1,3.5,1.4,0.3,1
5.7,3.8,1.7,0.3,1
5.1,3.8,1.5,0.3,1
5.4,3.4,1.7,0.2,1
5.1,3.7,1.5,0.4,1
4.6,3.6,1,0.2,1
5.1,3.3,1.7,0.5,1
4.8,3.4,1.9,0.2,1
5,3,1.6,0.2,1
5,3.4,1.6,0.4,1
5.2,3.5,1.5,0.2,1
5.2,3.4,1.4,0.2,1
4.7,3.2,1.6,0.2,1
4.8,3.1,1.6,0.2,1
5.4,3.4,1.5,0.4,1
5.2,4.1,1.5,0.1,1
5.5,4.2,1.4,0.2,1
4.9,3.1,1.5,0.1,1
5,3.2,1.2,0.2,1
5.5,3.5,1.3,0.2,1
4.9,3.1,1.5,0.1,1
4.4,3,1.3,0.2,1
5.1,3.4,1.5,0.2,1
5,3.5,1.3,0.3,1
4.5,2.3,1.3,0.3,1
4.4,3.2,1.3,0.2,1
5,3.5,1.6,0.6,1
5.1,3.8,1.9,0.4,1
4.8,3,1.4,0.3,1
5.1,3.8,1.6,0.2,1
4.6,3.2,1.4,0.2,1
5.3,3.7,1.5,0.2,1
5,3.3,1.4,0.2,1
7,3.2,4.7,1.4,2
6.4,3.2,4.5,1.5,2
6.9,3.1,4.9,1.5,2
5.5,2.3,4,1.3,2
6.5,2.8,4.6,1.5,2
5.7,2.8,4.5,1.3,2
6.3,3.3,4.7,1.6,2
4.9,2.4,3.3,1,2
6.6,2.9,4.6,1.3,2
5.2,2.7,3.9,1.4,2
5,2,3.5,1,2
5.9,3,4.2,1.5,2
6,2.2,4,1,2
6.1,2.9,4.7,1.4,2
5.6,2.9,3.6,1.3,2
6.7,3.1,4.4,1.4,2
5.6,3,4.5,1.5,2
5.8,2.7,4.1,1,2
6.2,2.2,4.5,1.5,2
5.6,2.5,3.9,1.1,2
5.9,3.2,4.8,1.8,2
6.1,2.8,4,1.3,2
6.3,2.5,4.9,1.5,2
6.1,2.8,4.7,1.2,2
6.4,2.9,4.3,1.3,2
6.6,3,4.4,1.4,2
6.8,2.8,4.8,1.4,2
6.7,3,5,1.7,2
6,2.9,4.5,1.5,2
5.7,2.6,3.5,1,2
5.5,2.4,3.8,1.1,2
5.5,2.4,3.7,1,2
5.8,2.7,3.9,1.2,2
6,2.7,5.1,1.6,2
5.4,3,4.5,1.5,2
6,3.4,4.5,1.6,2
6.7,3.1,4.7,1.5,2
6.3,2.3,4.4,1.3,2
5.6,3,4.1,1.3,2
5.5,2.5,4,1.3,2
5.5,2.6,4.4,1.2,2
6.1,3,4.6,1.4,2
5.8,2.6,4,1.2,2
5,2.3,3.3,1,2
5.6,2.7,4.2,1.3,2
5.7,3,4.2,1.2,2
5.7,2.9,4.2,1.3,2
6.2,2.9,4.3,1.3,2
5.1,2.5,3,1.1,2
5.7,2.8,4.1,1.3,2
6.3,3.3,6,2.5,3
5.8,2.7,5.1,1.9,3
7.1,3,5.9,2.1,3
6.3,2.9,5.6,1.8,3
6.5,3,5.8,2.2,3
7.6,3,6.6,2.1,3
4.9,2.5,4.5,1.7,3
7.3,2.9,6.3,1.8,3
6.7,2.5,5.8,1.8,3
7.2,3.6,6.1,2.5,3
6.5,3.2,5.1,2,3
6.4,2.7,5.3,1.9,3
6.8,3,5.5,2.1,3
5.7,2.5,5,2,3
5.8,2.8,5.1,2.4,3
6.4,3.2,5.3,2.3,3
6.5,3,5.5,1.8,3
7.7,3.8,6.7,2.2,3
7.7,2.6,6.9,2.3,3
6,2.2,5,1.5,3
6.9,3.2,5.7,2.3,3
5.6,2.8,4.9,2,3
7.7,2.8,6.7,2,3
6.3,2.7,4.9,1.8,3
6.7,3.3,5.7,2.1,3
7.2,3.2,6,1.8,3
6.2,2.8,4.8,1.8,3
6.1,3,4.9,1.8,3
6.4,2.8,5.6,2.1,3
7.2,3,5.8,1.6,3
7.4,2.8,6.1,1.9,3
7.9,3.8,6.4,2,3
6.4,2.8,5.6,2.2,3
6.3,2.8,5.1,1.5,3
6.1,2.6,5.6,1.4,3
7.7,3,6.1,2.3,3
6.3,3.4,5.6,2.4,3
6.4,3.1,5.5,1.8,3
6,3,4.8,1.8,3
6.9,3.1,5.4,2.1,3
6.7,3.1,5.6,2.4,3
6.9,3.1,5.1,2.3,3
5.8,2.7,5.1,1.9,3
6.8,3.2,5.9,2.3,3
6.7,3.3,5.7,2.5,3
6.7,3,5.2,2.3,3
6.3,2.5,5,1.9,3
6.5,3,5.2,2,3
6.2,3.4,5.4,2.3,3
5.9,3,5.1,1.8,3
12 changes: 7 additions & 5 deletions src/SALSA.jl
@@ -1,8 +1,9 @@
module SALSA

export salsa, mapstd, make_sparse, entropysubset, AFEm, gen_cross_validate
export salsa, mapstd, make_sparse, entropy_subset, AFEm, gen_cross_validate

using MLBase, Distributions, Compat, Distances
import Base: size, getindex, issparse, sub


# Calculate the misclassification rate
Expand All @@ -29,7 +30,8 @@ end
# needed support files
include(joinpath("kernels", "kernels.jl"))
include(joinpath("support", "constants.jl"))
include(joinpath("support", "entropysubset.jl"))
include(joinpath("support", "entropy_subset.jl"))
include(joinpath("support", "data_wrapper.jl"))
include(joinpath("support", "sparse.jl"))
include(joinpath("support", "mapstd.jl"))
include(joinpath("support", "AFEm.jl"))
Expand Down Expand Up @@ -79,7 +81,7 @@ function gen_cross_validate(evalfun::Function, X, Y, model::SALSAModel)
indices = get(model.cv_gen, Kfold(length(Y),nfolds()))
@parallel (+) for train_idx in collect(indices)
val_idx = setdiff(1:length(Y), train_idx)
evalfun(X[train_idx,:], Y[train_idx], X[val_idx,:], Y[val_idx])/nfolds()
evalfun(sub(X,train_idx,:), Y[train_idx], sub(X,val_idx,:), Y[val_idx])/nfolds()
end
end

Expand Down Expand Up @@ -123,9 +125,9 @@ function salsa(X, Y, model::SALSAModel, Xtest)
model.output = OutputModel{model.mode}()
end

if model.normalized && isempty(Xtest)
if model.normalized && isempty(Xtest) && typeof(X) <: Array
(X, model.output.X_mean, model.output.X_std) = mapstd(X)
elseif model.normalized
elseif model.normalized && typeof(X) <: Array
(X, model.output.X_mean, model.output.X_std) = mapstd(X)
Xtest = mapstd(Xtest,model.output.X_mean,model.output.X_std)
end
Expand Down
7 changes: 4 additions & 3 deletions src/algorithms/adaptive_l1rda_alg.jl
Expand Up @@ -16,13 +16,14 @@ function adaptive_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ:
g = zeros(d,1)
h = zeros(d,1)
w = rand(d,1)/100
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
g = spzeros(d,1)
h = spzeros(d,1)
total = length(X.nzval)
w = sprand(d,1,total/(N*d))/100
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -48,7 +49,7 @@ function adaptive_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ:
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# calculate dual average: (cumulative) gradient
g_new = dfunc(At,yt,w)
Expand Down
7 changes: 4 additions & 3 deletions src/algorithms/dropout_alg.jl
Expand Up @@ -13,12 +13,13 @@ function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
if ~check
w = rand(d)
rw = ones(d)/d
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
total = length(X.nzval)
w = sprand(d,1,total/(N*d))
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
f_sample = (p) -> rand(Bernoulli(p^2/(1+p^2)))
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -44,7 +45,7 @@ function dropout_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# define samples
if ~check
Expand Down
9 changes: 5 additions & 4 deletions src/algorithms/l1rda_alg.jl
Expand Up @@ -15,12 +15,13 @@ function l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64,
if check
g = zeros(d,1)
w = rand(d,1)/100
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
g = spzeros(d,1)
total = length(X.nzval)
w = sprand(d,1,total/(N*d))/100
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -46,8 +47,8 @@ function l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64, ρ::Float64,
w_prev = w

yt = Y[idx]
At = A[:,idx]

At = sub_arr(idx)
# calculate dual average: gradient
g = ((t-1)/t).*g + (1/(t)).*dfunc(At,yt,w)
λ_rda = λ+*γ)/sqrt(t)
Expand Down
9 changes: 5 additions & 4 deletions src/algorithms/pegasos_alg.jl
Expand Up @@ -13,12 +13,13 @@ function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
if ~check
w = rand(d)
w = w./(sqrt(λ)*vecnorm(w))
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
total = length(X.nzval)
w = sprand(d,1,total/(N*d))
w = w./(sqrt(λ)*vecnorm(w))
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -44,12 +45,12 @@ function pegasos_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int,
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# do a gradient descent step
η_t = 1/*t)
w = (1 - η_t*λ).*w
w = w - (η_t/k).*dfunc(At,yt,w_prev)#*class_weight[yt]
w = w - (η_t/k).*dfunc(At,yt,w_prev)
# project back to the set B: w \in convex set B
w = min(1,1/(sqrt(λ)*vecnorm(w))).*w

Expand Down
7 changes: 4 additions & 3 deletions src/algorithms/reweighted_l1rda_alg.jl
Expand Up @@ -17,12 +17,13 @@ function reweighted_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64,
g = zeros(d)
rw = ones(d)
w = rand(d,1)/100
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
g = spzeros(d,1)
total = length(X.nzval)
w = sprand(d,1,total/(N*d))/100
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -48,7 +49,7 @@ function reweighted_l1rda_alg(dfunc::Function, X, Y, λ::Float64, γ::Float64,
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# calculate dual average: gradient
g = ((t-1)/t).*g + (1/(t)).*dfunc(At,yt,w)
Expand Down
7 changes: 4 additions & 3 deletions src/algorithms/reweighted_l2rda_alg.jl
Expand Up @@ -16,13 +16,14 @@ function reweighted_l2rda_alg(dfunc::Function, X, Y, λ::Float64, ɛ::Float64, v
if check
g = zeros(d,1)
w = rand(d,1)/100
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
g = spzeros(d,1)
total = length(X.nzval)
w = sprand(d,1,total/(N*d))/100
A = [X'; sparse(ones(1,N))]
fg = i -> -1./.+ rw[i])
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -48,7 +49,7 @@ function reweighted_l2rda_alg(dfunc::Function, X, Y, λ::Float64, ɛ::Float64, v
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# calculate dual average: gradient
g = ((t-1)/t).*g + (1/(t)).*dfunc(At,yt,w)
Expand Down
7 changes: 4 additions & 3 deletions src/algorithms/sgd_alg.jl
Expand Up @@ -12,11 +12,12 @@ function sgd_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tole

if ~check
w = rand(d)
A = [X'; ones(1,N)]
sub_arr = (I) -> [sub(X,I,:) ones(k,1)]'
else
total = length(X.nzval)
w = sprand(d,1,total/(N*d))
A = [X'; sparse(ones(1,N))]
X = [X'; sparse(ones(1,N))]
sub_arr = (I) -> X[:,I]
end

if ~isempty(train_idx)
Expand All @@ -42,7 +43,7 @@ function sgd_alg(dfunc::Function, X, Y, λ::Float64, k::Int, max_iter::Int, tole
w_prev = w

yt = Y[idx]
At = A[:,idx]
At = sub_arr(idx)

# do a gradient descent step
η_t = 1/*t)
Expand Down
2 changes: 1 addition & 1 deletion src/algorithms/stochastic_rk_means.jl
Expand Up @@ -41,7 +41,7 @@ function stochastic_rk_means{A <: Algorithm}(X, ppc::RK_MEANS{A}, alg_params::Ve
failed_mapping = false; t = 1; Y = ones(N)

while true
eval = pairwise(Euclidean(), X[train_idx,:]', w)
eval = pairwise(Euclidean(), getindex(X,train_idx,:)', w)
(x,y) = findn(eval .== minimum(eval,2))
mappings = zeros(length(train_idx))
mappings[x] = y
Expand Down

0 comments on commit 768c1a1

Please sign in to comment.