From d88d103977c407246e451346f4b106fba7829c10 Mon Sep 17 00:00:00 2001 From: Vilen Jumutc Date: Fri, 18 Sep 2015 15:04:08 +0200 Subject: [PATCH] small refactoring --- src/SALSA.jl | 33 ++++++------------------------ src/SALSAModel.jl | 28 +------------------------- src/kernels/kernels.jl | 6 +----- src/loss_derivative.jl | 9 +-------- src/print.jl | 43 ++++++++++++++++++++++++++++++++++++++++ src/salsa_main.jl | 24 ++++++++++++++++++++++ src/support/constants.jl | 9 +-------- 7 files changed, 77 insertions(+), 75 deletions(-) create mode 100644 src/print.jl diff --git a/src/SALSA.jl b/src/SALSA.jl index dc49d9d..6d6d362 100644 --- a/src/SALSA.jl +++ b/src/SALSA.jl @@ -44,6 +44,7 @@ export salsa, DelimitedFile, # global optimization CSA, DS, GlobalOpt, + csa, ds, # cross-validation criterion MISCLASS, AUC, MSE, @@ -53,14 +54,14 @@ export salsa, LinearKernel, PolynomialKernel, # core algorithmic schemas + stochastic_rk_means, adaptive_l1rda_alg, reweighted_l1rda_alg, reweighted_l2rda_alg, pegasos_alg, dropout_alg, l1rda_alg, - sgd_alg, - stochastic_rk_means + sgd_alg using MLBase, Distributions, Compat, Distances, Clustering @@ -106,29 +107,7 @@ include("tune_algorithm.jl") include("tune_algorithm_AFEm.jl") # main runnable source include("salsa_main.jl") +# fine printing out +include("print.jl") -# extensive set of multiplicated aliases for different algorithms and models /// dense matrices -salsa{L <: Loss, A <: Algorithm, M <: Mode, N1 <: Number, N2 <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{L <: Loss, A <: Algorithm, M <: Mode, N1 <: Number, N2 <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{A <: Algorithm, N1 <: Number, N2 <: Number}(alg::Type{A}, X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) -salsa{A <: Algorithm, N1 <: Number, N2 <: Number}(alg::Type{A}, X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) -salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) -salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) -salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,1}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Array{Int64}(0,0)) -salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Array{Int64}(0,0)) -# extensive set of multiplicated aliases for different algorithms and models /// sparse matrices -salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{A <: Algorithm, N <: Number}(alg::Type{A}, X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) -salsa{A <: Algorithm, N <: Number}(alg::Type{A}, X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) -salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) -salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) -salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,1}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,sparse([])) -salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,sparse([])) -# extensive set of multiplicated aliases for different algorithms and models /// DelimitedFile -salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::DelimitedFile, Y::Array{N,1}, Xtest::DelimitedFile) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::DelimitedFile, Y::Array{N,2}, Xtest::DelimitedFile) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) -salsa{N <: Number}(X::DelimitedFile, Y::Array{N,1}, Xtest::DelimitedFile) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) -salsa{N <: Number}(X::DelimitedFile, Y::Array{N,2}, Xtest::DelimitedFile) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) - -end +end \ No newline at end of file diff --git a/src/SALSAModel.jl b/src/SALSAModel.jl index e6a58a2..ed0c191 100644 --- a/src/SALSAModel.jl +++ b/src/SALSAModel.jl @@ -112,30 +112,4 @@ SALSAModel() = SALSAModel(LINEAR,PEGASOS(),HINGE) SALSAModel{K <: Kernel}(kernel::Type{K}, model) = SALSAModel(model.mode,model.algorithm,model.loss_function,kernel=kernel,process_labels=model.process_labels,validation_criterion=model.validation_criterion) SALSAModel{A <: Algorithm}(algorithm::A, model) = SALSAModel(model.mode,algorithm,model.loss_function,kernel=model.kernel,process_labels=model.process_labels,validation_criterion=model.validation_criterion) SALSAModel{L <: Loss}(loss_function::Type{L}, model) = SALSAModel(model.mode,model.algorithm,loss_function,kernel=model.kernel,process_labels=model.process_labels,validation_criterion=model.validation_criterion) -SALSAModel{M <: Mode}(mode::Type{M}, model) = SALSAModel(mode,model.algorithm,model.loss_function,kernel=model.kernel,process_labels=model.process_labels,validation_criterion=model.validation_criterion) - -check_printable(value) = typeof(value) <: Array || typeof(value) <: Mode -print_value(value) = check_printable(value) ? summary(value) : value - -show(io::IO, t::PEGASOS) = @printf io "%s (%s)" typeof(t) "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM" -show(io::IO, t::L1RDA) = @printf io "%s (%s)" typeof(t) "l1-Regularized Dual Averaging" -show(io::IO, t::R_L1RDA) = @printf io "%s (%s)" typeof(t) "Reweighted l1-Regularized Dual Averaging" -show(io::IO, t::R_L2RDA) = @printf io "%s (%s)" typeof(t) "Reweighted l2-Regularized Dual Averaging" -show(io::IO, t::ADA_L1RDA) = @printf io "%s (%s)" typeof(t) "Adaptive l1-Regularized Dual Averaging" -show(io::IO, t::DROP_OUT) = @printf io "%s (%s)" typeof(t) "Dropout Pegasos (experimental)" -show(io::IO, t::SIMPLE_SGD) = @printf io "%s (%s)" typeof(t) "Stochastic Gradient Descent" - -function show(io::IO, model::SALSAModel) - print_with_color(:blue, io, "SALSA model:\n") - for field in fieldnames(model) - value = getfield(model,field) - field == :output ? println() : @printf io "\t%s : %s\n" field print_value(value) - end - print_with_color(:blue, io, "SALSA model.output:\n") - for field in fieldnames(model.output) - if isdefined(model.output,field) - value = getfield(model.output,field) - @printf io "\t%s : %s\n" field print_value(value) - end - end -end \ No newline at end of file +SALSAModel{M <: Mode}(mode::Type{M}, model) = SALSAModel(mode,model.algorithm,model.loss_function,kernel=model.kernel,process_labels=model.process_labels,validation_criterion=model.validation_criterion) \ No newline at end of file diff --git a/src/kernels/kernels.jl b/src/kernels/kernels.jl index d10eb43..8865e6b 100644 --- a/src/kernels/kernels.jl +++ b/src/kernels/kernels.jl @@ -19,8 +19,4 @@ include("polynomial_kernel.jl") include("linear_kernel.jl") kernel_from_parameters{T<:Kernel}(k::Type{T}, parameters) = k(parameters...) -kernel_from_data_model{T<:Kernel}(k::Type{T}, X) = isempty(fieldnames(k)) ? k() : k(rand(length(fieldnames(k)))...) - -show(io::IO, t::Type{RBFKernel}) = @printf io "SALSA.RBFKernel (%s)" "Radial Basis Function kernel, i.e. k(x,y) = exp(-||x - y||^2/(2σ^2))" -show(io::IO, t::Type{PolynomialKernel}) = @printf io "SALSA.PolynomialKernel (%s)" "Polynomial kernel, i.e. k(x,y) = ( + τ)^d" -show(io::IO, t::Type{LinearKernel}) = @printf io "SALSA.LinearKernel (%s)" "Linear kernel, i.e. k(x,y) = " \ No newline at end of file +kernel_from_data_model{T<:Kernel}(k::Type{T}, X) = isempty(fieldnames(k)) ? k() : k(rand(length(fieldnames(k)))...) \ No newline at end of file diff --git a/src/loss_derivative.jl b/src/loss_derivative.jl index 07fea09..b0018da 100644 --- a/src/loss_derivative.jl +++ b/src/loss_derivative.jl @@ -92,11 +92,4 @@ loss_derivative(::Type{SQUARED_HINGE}) = squared_hinge_loss_derivative loss_derivative(::Type{MODIFIED_HUBER}) = modified_huber_loss_derivative loss_derivative(::Type{PINBALL},tau::Float64) = (At,yt,w) -> pinball_loss_derivative(At,yt,w,tau) loss_derivative{A <: Algorithm, M <: Euclidean}(alg::RK_MEANS{A,M}) = (At::Matrix,yt,w) -> reduce((d0,i) -> d0 + (w - At[:,i]), zeros(size(At,1),1), 1:1:size(At,2)) -loss_derivative{A <: Algorithm, M <: CosineDist}(alg::RK_MEANS{A,M}) = (At::Matrix,yt,w) -> begin idx = find(evaluate(At,yt,w) .<= 0); -sum(At[:,idx],2) end - -show(io::IO, t::Type{HINGE}) = @printf io "SALSA.HINGE (%s)" "Hinge Loss, i.e. l(y,p) = max(0,1 - yp)" -show(io::IO, t::Type{LOGISTIC}) = @printf io "SALSA.LOGISTIC (%s)" "Logistic Loss, i.e. l(y,p) = log(1 + exp(-yp))" -show(io::IO, t::Type{LEAST_SQUARES}) = @printf io "SALSA.LEAST_SQUARES (%s)" "Squared Loss, i.e. l(y,p) = 1/2*(p - y)^2" -show(io::IO, t::Type{SQUARED_HINGE}) = @printf io "SALSA.SQUARED_HINGE (%s)" "Squared Hinge Loss, i.e. l(y,p) = max(0,1 - yp)^2" -show(io::IO, t::Type{PINBALL}) = @printf io "SALSA.PINBALL (%s)" "Pinball (quantile) Loss, i.e. l(y,p) = τI(yp>=1)yp + I(yp<1)(1 - yp)" -show(io::IO, t::Type{MODIFIED_HUBER}) = @printf io "SALSA.MODIFIED_HUBER (%s)" "Modified Huber Loss, i.e. l(y,p) = -4I(yp<-1)yp + I(yp>=-1)max(0,1 - yp)^2" \ No newline at end of file +loss_derivative{A <: Algorithm, M <: CosineDist}(alg::RK_MEANS{A,M}) = (At::Matrix,yt,w) -> begin idx = find(evaluate(At,yt,w) .<= 0); -sum(At[:,idx],2) end \ No newline at end of file diff --git a/src/print.jl b/src/print.jl new file mode 100644 index 0000000..23f0f4c --- /dev/null +++ b/src/print.jl @@ -0,0 +1,43 @@ +check_printable(value) = typeof(value) <: Array || typeof(value) <: Mode +print_value(value) = check_printable(value) ? summary(value) : value + +show(io::IO, t::PEGASOS) = @printf io "%s (%s)" typeof(t) "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM" +show(io::IO, t::L1RDA) = @printf io "%s (%s)" typeof(t) "l1-Regularized Dual Averaging" +show(io::IO, t::R_L1RDA) = @printf io "%s (%s)" typeof(t) "Reweighted l1-Regularized Dual Averaging" +show(io::IO, t::R_L2RDA) = @printf io "%s (%s)" typeof(t) "Reweighted l2-Regularized Dual Averaging" +show(io::IO, t::ADA_L1RDA) = @printf io "%s (%s)" typeof(t) "Adaptive l1-Regularized Dual Averaging" +show(io::IO, t::DROP_OUT) = @printf io "%s (%s)" typeof(t) "Dropout Pegasos (experimental)" +show(io::IO, t::SIMPLE_SGD) = @printf io "%s (%s)" typeof(t) "Stochastic Gradient Descent" + +show(io::IO, t::MSE) = @printf io "%s (%s)" typeof(t) "Mean Squared Error" +show(io::IO, t::MISCLASS) = @printf io "%s (%s)" typeof(t) "Misclassification Rate" +show(io::IO, t::SILHOUETTE) = @printf io "%s (%s)" typeof(t) "Silhouette Index" +show(io::IO, t::AUC) = @printf io "%s (%s with %d thresholds)" typeof(t) "Area Under ROC Curve" t.n_thresholds +show(io::IO, t::CSA) = @printf io "%s (%s)" typeof(t) "Coupled Simulated Annealing" +show(io::IO, t::DS) = @printf io "%s (%s)" typeof(t) "Directional Search" + +show(io::IO, t::Type{HINGE}) = @printf io "SALSA.HINGE (%s)" "Hinge Loss, i.e. l(y,p) = max(0,1 - yp)" +show(io::IO, t::Type{LOGISTIC}) = @printf io "SALSA.LOGISTIC (%s)" "Logistic Loss, i.e. l(y,p) = log(1 + exp(-yp))" +show(io::IO, t::Type{LEAST_SQUARES}) = @printf io "SALSA.LEAST_SQUARES (%s)" "Squared Loss, i.e. l(y,p) = 1/2*(p - y)^2" +show(io::IO, t::Type{SQUARED_HINGE}) = @printf io "SALSA.SQUARED_HINGE (%s)" "Squared Hinge Loss, i.e. l(y,p) = max(0,1 - yp)^2" +show(io::IO, t::Type{PINBALL}) = @printf io "SALSA.PINBALL (%s)" "Pinball (quantile) Loss, i.e. l(y,p) = τI(yp>=1)yp + I(yp<1)(1 - yp)" +show(io::IO, t::Type{MODIFIED_HUBER}) = @printf io "SALSA.MODIFIED_HUBER (%s)" "Modified Huber Loss, i.e. l(y,p) = -4I(yp<-1)yp + I(yp>=-1)max(0,1 - yp)^2" + +show(io::IO, t::Type{RBFKernel}) = @printf io "SALSA.RBFKernel (%s)" "Radial Basis Function kernel, i.e. k(x,y) = exp(-||x - y||^2/(2σ^2))" +show(io::IO, t::Type{PolynomialKernel}) = @printf io "SALSA.PolynomialKernel (%s)" "Polynomial kernel, i.e. k(x,y) = ( + τ)^d" +show(io::IO, t::Type{LinearKernel}) = @printf io "SALSA.LinearKernel (%s)" "Linear kernel, i.e. k(x,y) = " + +function show(io::IO, model::SALSAModel) + print_with_color(:blue, io, "SALSA model:\n") + for field in fieldnames(model) + value = getfield(model,field) + field == :output ? println() : @printf io "\t%s : %s\n" field print_value(value) + end + print_with_color(:blue, io, "SALSA model.output:\n") + for field in fieldnames(model.output) + if isdefined(model.output,field) + value = getfield(model.output,field) + @printf io "\t%s : %s\n" field print_value(value) + end + end +end \ No newline at end of file diff --git a/src/salsa_main.jl b/src/salsa_main.jl index 2a7b28d..15552c2 100644 --- a/src/salsa_main.jl +++ b/src/salsa_main.jl @@ -73,3 +73,27 @@ function salsa(X, Y, model::SALSAModel) run_with_params(features_train,Y,model,pars) end end + +# extensive set of multiplicated aliases for different algorithms and models /// dense matrices +salsa{L <: Loss, A <: Algorithm, M <: Mode, N1 <: Number, N2 <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{L <: Loss, A <: Algorithm, M <: Mode, N1 <: Number, N2 <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{A <: Algorithm, N1 <: Number, N2 <: Number}(alg::Type{A}, X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) +salsa{A <: Algorithm, N1 <: Number, N2 <: Number}(alg::Type{A}, X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) +salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,1}, Xtest::Array{N1,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) +salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,2}, Xtest::Array{N1,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) +salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,1}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Array{Int64}(0,0)) +salsa{N1 <: Number, N2 <: Number}(X::Array{N1,2}, Y::Array{N2,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Array{Int64}(0,0)) +# extensive set of multiplicated aliases for different algorithms and models /// sparse matrices +salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{A <: Algorithm, N <: Number}(alg::Type{A}, X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) +salsa{A <: Algorithm, N <: Number}(alg::Type{A}, X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(X,Y,SALSAModel(LINEAR,alg(),HINGE),Xtest) +salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,1}, Xtest::SparseMatrixCSC) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) +salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,2}, Xtest::SparseMatrixCSC) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) +salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,1}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,sparse([])) +salsa{N <: Number}(X::SparseMatrixCSC, Y::Array{N,2}) = salsa(LINEAR,PEGASOS,HINGE,X,Y,sparse([])) +# extensive set of multiplicated aliases for different algorithms and models /// DelimitedFile +salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::DelimitedFile, Y::Array{N,1}, Xtest::DelimitedFile) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{L <: Loss, A <: Algorithm, M <: Mode, N <: Number}(mode::Type{M}, alg::Type{A}, loss::Type{L}, X::DelimitedFile, Y::Array{N,2}, Xtest::DelimitedFile) = salsa(X,Y,SALSAModel(mode,alg(),loss),Xtest) +salsa{N <: Number}(X::DelimitedFile, Y::Array{N,1}, Xtest::DelimitedFile) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) +salsa{N <: Number}(X::DelimitedFile, Y::Array{N,2}, Xtest::DelimitedFile) = salsa(LINEAR,PEGASOS,HINGE,X,Y,Xtest) \ No newline at end of file diff --git a/src/support/constants.jl b/src/support/constants.jl index fe0586b..56811ac 100644 --- a/src/support/constants.jl +++ b/src/support/constants.jl @@ -14,11 +14,4 @@ immutable DS <: GlobalOpt end DS() = DS(Array(Float64,0)) -AUC() = AUC(100) - -show(io::IO, t::MSE) = @printf io "%s (%s)" typeof(t) "Mean Squared Error" -show(io::IO, t::MISCLASS) = @printf io "%s (%s)" typeof(t) "Misclassification Rate" -show(io::IO, t::SILHOUETTE) = @printf io "%s (%s)" typeof(t) "Silhouette Index" -show(io::IO, t::AUC) = @printf io "%s (%s with %d thresholds)" typeof(t) "Area Under ROC Curve" t.n_thresholds -show(io::IO, t::CSA) = @printf io "%s (%s)" typeof(t) "Coupled Simulated Annealing" -show(io::IO, t::DS) = @printf io "%s (%s)" typeof(t) "Directional Search" \ No newline at end of file +AUC() = AUC(100) \ No newline at end of file