Skip to content

Commit

Permalink
eliminate degenerate ordinal variables from hello_world.jl test
Browse files Browse the repository at this point in the history
  • Loading branch information
madeleineudell committed Jul 23, 2019
1 parent beb5e18 commit caef5c3
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 14 deletions.
11 changes: 4 additions & 7 deletions src/losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,14 @@ mutable struct BvSLoss<:Loss
scale::Float64
domain::Domain
end
BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale)) = BvSLoss(m,bin_loss,scale,domain)
function BvSLoss(m::Integer, scale::Float64=1.0; domain=OrdinalDomain(1,m), bin_loss::Loss=LogisticLoss(scale))
@assert(m >= 2, error("Number of levels of ordinal variable must be at least 2; got $m."))
BvSLoss(m,bin_loss,scale,domain)
end
BvSLoss() = BvSLoss(10) # for copying correctly
embedding_dim(l::BvSLoss) = l.max-1
datalevels(l::BvSLoss) = 1:l.max # levels are encoded as the numbers 1:l.max

# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d
# function evaluate(l::BvSLoss, u::Array{Float64,2}, a::Int)
# this breaks compatibility with v0.4
function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int)
loss = 0
for j in 1:length(u)
Expand All @@ -481,9 +481,6 @@ function evaluate(l::BvSLoss, u::Array{Float64,1}, a::Int)
return l.scale*loss
end

# in Julia v0.4, argument u is a row vector (row slice of a matrix), which in julia is 2d
# function grad(l::BvSLoss, u::Array{Float64,2}, a::Int)
# this breaks compatibility with v0.4
function grad(l::BvSLoss, u::Array{Float64,1}, a::Int)
g = zeros(length(u))
for j in 1:length(u)
Expand Down
2 changes: 1 addition & 1 deletion src/sample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true
# make sure we don't mutate the type of the array A
# even if all data for some real loss take integer values
for j=1:n
if isa(domains[j], RealDomain) && isa(glrm.A[j], DataArray{Int64,1})
if isa(domains[j], RealDomain) && isa(glrm.A[:,j], Array{Union{Missing, Int},1})
domains[j] = OrdinalDomain(minimum(dropmissing(glrm.A[j])), maximum(dropmissing(glrm.A[j])))
end
end
Expand Down
15 changes: 9 additions & 6 deletions test/hello_world.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using LowRankModels, DataFrames, Random, SparseArrays
Random.seed!(0)

# loss types to test
real_loss_types = [QuadLoss, HuberLoss]
Expand Down Expand Up @@ -27,6 +28,8 @@ end

# regularizers to test
regularizers = [QuadReg(), OneReg(5), NonNegConstraint(), KSparseConstraint(2)]
# add more regularizers = more rows so the data isn't degenerate
regularizers = cat(regularizers, fill(QuadReg(), 10), dims=1)

m,n = length(regularizers), length(losses)

Expand All @@ -39,7 +42,7 @@ A_cat = rand(1:3, m, length(categorical_losses))
A = Any[A_real A_bool A_ord A_cat]

glrm = GLRM(A, losses, regularizers, QuadReg(), 2)
fit!(glrm)
fit!(glrm, verbose=false)
println("successfully fit matrix")

### now fit data frame
Expand All @@ -48,18 +51,18 @@ df = NaNs_to_Missing!(DataFrame(Array(0 ./ A_sparse + A_sparse)))
# explicitly encoding missing
obs = observations(df)
glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2, obs=obs)
fit!(glrm)
fit!(glrm, verbose=false)

# implicitly encoding missings from dataframe - this functionality has not been implemented for dataframes
# glrm = GLRM(df, QuadLoss(), QuadReg(), QuadReg(), 2)
# fit!(glrm)
# fit!(glrm, verbose=false)

# without specifying losses directly
glrm = GLRM(DataFrame(A), 3, data_types)
fit!(glrm)
fit!(glrm, verbose=false)
println("successfully fit dataframe")

# imputation and sampling
### imputation and sampling
impute(glrm)
println("successfully imputed entries")
sample(glrm)
Expand All @@ -71,5 +74,5 @@ println("successfully sampled from model")
m, n = 10, 10
sparseA = sprandn(m, n, .5)
glrm = GLRM(A, QuadLoss(), QuadReg(), QuadReg(), 5)
fit!(glrm)
fit!(glrm, verbose=false)
println("successfully fit sparse GLRM")
8 changes: 8 additions & 0 deletions todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,11 @@
* sample doesn't work
* lots of bugs in fit_dataframe_w_type_imputation; deprecated for now. (also it's an odd thing to do.)
* imputation doesn't return correct type (for dataframes)

# How to register/publish a new version of the package

1. update version number in Project.toml
2. navigate to commit that you want tagged on github
3. comment @Registrator register
4. monitor resulting PR on the general registry to see if any bugs are found
5. when PR is accepted, use Tagger to make github release

0 comments on commit caef5c3

Please sign in to comment.