In [18]:
using DataFrames, Plots, CSV, LowRankModels
# load data
train = CSV.read("train_input.csv"; types=[Int, Int, Int,Int,Int, Float64, Float64, Float64,Float64,Int]);
test = CSV.read("test_input.csv"; types=[Int, Int, Int,Int,Int, Float64, Float64, Float64,Float64,Int]);

In [7]:
import LowRankModels: evaluate, grad
evaluate(loss::Loss, X::Array{Float64,2}, w::Array{Float64,1}, y) = evaluate(loss, X*w, y)
grad(loss::Loss, X::Array{Float64,2}, w::Array{Float64,1}, y) = X'*grad(loss, X*w, y)
evaluate(loss::Loss, X::Array{Float64,2}, w::Array{Float64,2}, y) = evaluate(loss, X*w, y)
grad(loss::Loss, X::Array{Float64,2}, w::Array{Float64,2}, y) = X'*grad(loss, X*w, y)

is_differentiable(l::QuadLoss) = true
is_differentiable(l::L1Loss) = false
is_differentiable(l::HuberLoss) = true
is_differentiable(l::QuantileLoss) = false
is_differentiable(l::PoissonLoss) = true
is_differentiable(l::WeightedHingeLoss) = false
is_differentiable(l::LogisticLoss) = true
is_differentiable(l::OrdinalHingeLoss) = false
is_differentiable(l::OrdisticLoss) = true
is_differentiable(l::MultinomialOrdinalLoss) = true
is_differentiable(l::BvSLoss) = is_differentiable(l.bin_loss)
is_differentiable(l::MultinomialLoss) = true
is_differentiable(l::OvALoss) = is_differentiable(l.bin_loss)
is_differentiable(l::PeriodicLoss) = true

function proxgrad(loss::Loss, args...; kwargs...)
  return proxgrad_linesearch(loss, args...; kwargs...)
  # if is_differentiable(loss)
  #   return proxgrad_linesearch(loss, args...; kwargs...)
  # else
  #   return proxgrad_dec(loss, args...; kwargs...)
  # end
end

function proxgrad_linesearch(loss::Loss, reg::Regularizer, X::Array{Float64,2}, y;
                  maxiters = 100,
                  stepsize = 1,
                  w = (embedding_dim(loss)==1 ? zeros(size(X,2)) : zeros(size(X,2), embedding_dim(loss))),
                  ch = ConvergenceHistory("proxgrad"))
    update_ch!(ch, 0, evaluate(loss, X, w, y) + evaluate(reg, w))
    t = time()
    for i=1:maxiters
        # gradient
        g = grad(loss, X, w, y)
        # prox gradient step
        neww = prox(reg, w - stepsize*g, stepsize)
        # record objective value
        curobj = evaluate(loss, X, neww, y) + evaluate(reg, neww)
        if curobj > ch.objective[end]
          stepsize *= .5
        else
          copy!(w, neww)
          t, told = time(), t
          update_ch!(ch, t - told, curobj)
        end
    end
    return w
end

function proxgrad_dec(loss::Loss, reg::Regularizer, X::Array{Float64,2}, y;
                  maxiters = 100,
                  stepsize = 1,
                  w = (embedding_dim(loss)==1 ? zeros(size(X,2)) : zeros(size(X,2), embedding_dim(loss))),
                  ch = ConvergenceHistory("proxgrad"),
                  verbose = true)
    wbest = copy(w)
    update_ch!(ch, 0, evaluate(loss, X, w, y) + evaluate(reg, w))
    t = time()
    if verbose
      println("using decreasing stepsize for nondifferentiable loss")
    end
    for i=1:maxiters
        # gradient
        g = grad(loss, X, w, y)
        # prox gradient step
        w = prox(reg, w - stepsize/i*g, stepsize/i)
        # record objective value
        obj = evaluate(loss, X, w, y) + evaluate(reg, w)
        if obj < ch.objective[end]
          if verbose
            println("found a better obj $obj")
          end
          copy!(wbest, w)
          update_ch!(ch, time() - t, obj)
        end
    end
    return wbest
end

function proxgrad_const(loss::Loss, reg::Regularizer, X::Array{Float64,2}, y;
                  maxiters = 100,
                  stepsize = 1,
                  w = (embedding_dim(loss)==1 ? zeros(size(X,2)) : zeros(size(X,2), embedding_dim(loss))),
                  ch = ConvergenceHistory("proxgrad"))
    wbest = copy(w)
    update_ch!(ch, 0, evaluate(loss, X, w, y) + evaluate(reg, w))
    t = time()
    for i=1:maxiters
        # gradient
        g = grad(loss, X, w, y)
        # prox gradient step
        w = prox(reg, w - stepsize*g, stepsize)
        # record objective value
        obj = evaluate(loss, X, w, y) + evaluate(reg, w)
        if obj < ch.objective[end]
          copy!(wbest, w)
          update_ch!(ch, time() - t, obj)
        end    end
    return wbest
end

proxgrad_const (generic function with 1 method)

In [56]:
errorRate(prediction, labels) = sum(abs.(sign.(prediction) - sign.(labels)) / 2.0) / size(labels,1)
squaredError(prediction, labels) = sum(abs2.(prediction - sign.(testy)))

squaredError (generic function with 1 method)

# least squares regression

In [34]:
getElements(a) = get(a)
n = size(X,1)
X = convert(Array, train[1:size(train, 1), 6:(size(train, 2) - 1)]);
y = convert(Array, train[1:size(train, 1), end:end]);
X = getElements.(X)
y = getElements.(y)
X = [X ones(n)]
y = y[:]
size(y)

(160000,)

In [57]:
# least squares regression
w_quad = proxgrad(QuadLoss(), ZeroReg(), X, y, maxiters=600)

5-element Array{Float64,1}:
  0.0143044 
 -0.00726158
  0.0207852 
 -0.0206557 
  0.00134724

In [58]:
testX = convert(Array, test[1:size(test, 1), 6:(size(test, 2) - 1)]);
testy = convert(Array, test[1:size(test, 1), end:end]);
testX = [getElements.(testX) ones(size(testX,1))]
testy = getElements.(testy)
test_pred = sign.(testX*w_quad);

In [59]:
println(errorRate(testX*w_quad, testy))
println(squaredError(testX*w_quad, testy))

0.30919220055710306
338.9134471630731


## ridge

In [72]:
w_ridge = proxgrad(QuadLoss(), QuadReg(), X, y, maxiters=600)

Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m./deprecated.jl:70[22m[22m
 [2] [1msumabs2[22m[22m[1m([22m[22m::Array{Float64,1}[1m)[22m[22m at [1m./deprecated.jl:57[22m[22m
 [3] [1mevaluate[22m[22m at [1m/////////mnt/juliabox/.julia/v0.6/LowRankModels/src/regularizers.jl:57[22m[22m [inlined]
 [4] [1m#proxgrad_linesearch#6[22m[22m[1m([22m[22m::Int64, ::Int64, ::Array{Float64,1}, ::LowRankModels.ConvergenceHistory, ::Function, ::LowRankModels.QuadLoss, ::LowRankModels.QuadReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./In[7]:36[22m[22m
 [5] [1m(::#kw##proxgrad_linesearch)[22m[22m[1m([22m[22m::Array{Any,1}, ::#proxgrad_linesearch, ::LowRankModels.QuadLoss, ::LowRankModels.QuadReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./<missing>:0[22m[22m
 [6] [1m#proxgrad#5[22m[22m[1m([22m[22m::Array{Any,1}, ::Function, ::LowRankModels.QuadLoss, ::LowRankModels.QuadReg, ::Varar

5-element Array{Float64,1}:
  0.0143044 
 -0.00726158
  0.0207852 
 -0.0206557 
  0.00134724

In [73]:
test_pred = sign.(testX*w_ridge)
println(errorRate(testX*w_ridge, testy))
println(squaredError(testX*w_ridge, testy))

0.30919220055710306
338.91344883383545


## lasso

In [74]:
w_lasso = proxgrad(QuadLoss(), OneReg(), X, y, maxiters=600)

Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m./deprecated.jl:70[22m[22m
 [2] [1msumabs[22m[22m[1m([22m[22m::Array{Float64,1}[1m)[22m[22m at [1m./deprecated.jl:57[22m[22m
 [3] [1mevaluate[22m[22m at [1m/////////mnt/juliabox/.julia/v0.6/LowRankModels/src/regularizers.jl:87[22m[22m [inlined]
 [4] [1m#proxgrad_linesearch#6[22m[22m[1m([22m[22m::Int64, ::Int64, ::Array{Float64,1}, ::LowRankModels.ConvergenceHistory, ::Function, ::LowRankModels.QuadLoss, ::LowRankModels.OneReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./In[7]:36[22m[22m
 [5] [1m(::#kw##proxgrad_linesearch)[22m[22m[1m([22m[22m::Array{Any,1}, ::#proxgrad_linesearch, ::LowRankModels.QuadLoss, ::LowRankModels.OneReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./<missing>:0[22m[22m
 [6] [1m#proxgrad#5[22m[22m[1m([22m[22m::Array{Any,1}, ::Function, ::LowRankModels.QuadLoss, ::LowRankModels.OneReg, ::Vararg{An

5-element Array{Float64,1}:
  0.0143042 
 -0.00726146
  0.0207851 
 -0.0206556 
  0.00134711

In [75]:
test_pred = sign.(testX*w_lasso)
println(errorRate(testX*w_lasso, testy))
println(squaredError(testX*w_lasso, testy))

0.30919220055710306
338.9135139125289


# l1 regression

In [76]:
w_l1 = proxgrad(L1Loss(), ZeroReg(), X, y, maxiters=700)

5-element Array{Float64,1}:
  0.0176902 
 -0.00838146
  0.032531  
 -0.0296652 
  0.00091361

In [77]:
test_pred = sign.(testX*w_l1)
println(errorRate(testX*w_l1, testy))
println(squaredError(testX*w_l1, testy))

0.49025069637883006


LoadError: [91mMethodError: no method matching *(::Array{Float64,2}, ::LowRankModels.#l1)[0m
Closest candidates are:
  *(::Any, ::Any, [91m::Any[39m, [91m::Any...[39m) at operators.jl:424
  *(::Union{Base.ReshapedArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},2,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray, DenseArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},2}, SubArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},2,A,I,L} where L} where I<:Tuple{Vararg{Union{Base.AbstractCartesianIndex, Int64, Range{Int64}},N} where N} where A<:Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray where N where T, DenseArray}, [91m::Union{Base.ReshapedArray{S,1,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray, DenseArray{S,1}, SubArray{S,1,A,I,L} where L} where I<:Tuple{Vararg{Union{Base.AbstractCartesianIndex, Int64, Range{Int64}},N} where N} where A<:Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray where N where T, DenseArray}[39m) where {T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64}, S} at linalg/matmul.jl:74
  *(::Union{Base.ReshapedArray{T,2,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray, DenseArray{T,2}, SubArray{T,2,A,I,L} where L} where I<:Tuple{Vararg{Union{Base.AbstractCartesianIndex, Int64, Range{Int64}},N} where N} where A<:Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:DenseArray where N where T, DenseArray} where T, [91m::Union{Base.LinAlg.QRCompactWYQ, Base.LinAlg.QRPackedQ}[39m) at linalg/qr.jl:627
  ...[39m

# huber regression

In [78]:
w_huber = proxgrad(HuberLoss(), ZeroReg(), X, y, maxiters=700)

5-element Array{Float64,1}:
  0.000216319
 -0.000119129
  0.00244823 
  0.00153871 
  4.19456e-6 

In [79]:
test_pred = sign.(testX*w_huber)
println(errorRate(testX*w_huber, testy))
println(squaredError(testX*w_huber, testy))

0.5069637883008357
418.9386084894679


# Logistic regression

In [80]:
w_logistic = proxgrad(LogisticLoss(), QuadReg(), X, y, maxiters=600)

Stacktrace:
 [1] [1mdepwarn[22m[22m[1m([22m[22m::String, ::Symbol[1m)[22m[22m at [1m./deprecated.jl:70[22m[22m
 [2] [1msumabs2[22m[22m[1m([22m[22m::Array{Float64,1}[1m)[22m[22m at [1m./deprecated.jl:57[22m[22m
 [3] [1mevaluate[22m[22m at [1m/////////mnt/juliabox/.julia/v0.6/LowRankModels/src/regularizers.jl:57[22m[22m [inlined]
 [4] [1m#proxgrad_linesearch#6[22m[22m[1m([22m[22m::Int64, ::Int64, ::Array{Float64,1}, ::LowRankModels.ConvergenceHistory, ::Function, ::LowRankModels.LogisticLoss, ::LowRankModels.QuadReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./In[7]:36[22m[22m
 [5] [1m(::#kw##proxgrad_linesearch)[22m[22m[1m([22m[22m::Array{Any,1}, ::#proxgrad_linesearch, ::LowRankModels.LogisticLoss, ::LowRankModels.QuadReg, ::Array{Float64,2}, ::Array{Int64,1}[1m)[22m[22m at [1m./<missing>:0[22m[22m
 [6] [1m#proxgrad#5[22m[22m[1m([22m[22m::Array{Any,1}, ::Function, ::LowRankModels.LogisticLoss, ::LowRankModels.Quad

5-element Array{Float64,1}:
  0.0288396 
 -0.0147028 
  0.0422295 
 -0.0419693 
  0.00270916

In [81]:
test_pred = sign.(testX*w_logistic)
println(errorRate(testX*w_logistic, testy))
println(squaredError(testX*w_logistic, testy))

0.30919220055710306
329.0205457559514


In [82]:
w_logistic = proxgrad(LogisticLoss(), ZeroReg(), X, y, maxiters=600)

5-element Array{Float64,1}:
  0.0288396 
 -0.0147028 
  0.0422296 
 -0.0419694 
  0.00270916

In [83]:
test_pred = sign.(testX*w_logistic)
println(errorRate(testX*w_logistic, testy))
println(squaredError(testX*w_logistic, testy))

0.30919220055710306
329.0205374123414
