Port to Julia 0.7/1.0 (#28)

Use the new Name wrapper from NamedArrays 0.9.0 when indexing to avoid ambiguity with Integer names.
nalimilan · Sep 5, 2018 · b293d6a · b293d6a
1 parent 45067fc
commit b293d6a
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 49 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,14 +1,20 @@
 language: julia
+
 os:
-  - osx
   - linux
+  - osx
+
 julia:
-  - 0.6
+  - 0.7
+  - 1.0
   - nightly
+
 notifications:
   email: false
+
 script:
   - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
-  - julia -e 'Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';
+  - julia -e 'using Pkg; Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';
+
 after_success:
-  - julia -e 'cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
+  - julia -e 'using Pkg; cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![Build Status](https://travis-ci.org/nalimilan/FreqTables.jl.svg?branch=master)](https://travis-ci.org/nalimilan/FreqTables.jl)
 [![Coverage Status](https://coveralls.io/repos/nalimilan/FreqTables.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/nalimilan/FreqTables.jl?branch=master)
-[![FreqTables](http://pkg.julialang.org/badges/FreqTables_0.6.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=0.6)
+[![FreqTables](http://pkg.julialang.org/badges/FreqTables_1.0.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=1.0)
 
 This package allows computing one- or multi-way frequency tables (a.k.a. contingency or pivot tables) from
 any type of vector or array. It includes support for [`CategoricalArray`](https://github.com/JuliaData/CategoricalArrays.jl)

diff --git a/REQUIRE b/REQUIRE
@@ -1,4 +1,4 @@
-julia 0.6
-NamedArrays
+julia 0.7
+NamedArrays 0.9.1
 CategoricalArrays 0.3.0
 DataFrames 0.11.0
diff --git a/src/freqtable.jl b/src/freqtable.jl
@@ -1,8 +1,7 @@
 import Base.ht_keyindex
 
 # Cf. https://github.com/JuliaStats/StatsBase.jl/issues/135
-immutable UnitWeights <: AbstractVector{Int}
-end
+struct UnitWeights <: AbstractVector{Int} end
 Base.getindex(w::UnitWeights, ::Integer...) = 1
 Base.getindex(w::UnitWeights, ::AbstractVector) = w
 
@@ -15,11 +14,11 @@ Base.@pure vectypes(T) = Tuple{map(U -> Vector{U}, T.parameters)...}
 function _freqtable(x::Tuple,
                     skipmissing::Bool = false,
                     weights::AbstractVector{<:Real} = UnitWeights(),
-                    subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
+                    subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
     n = length(x)
     n == 0 && throw(ArgumentError("at least one argument must be provided"))
 
-    if !isa(subset, Void)
+    if !isa(subset, Nothing)
         x = map(y -> y[subset], x)
         weights = weights[subset]
     end
@@ -50,12 +49,12 @@ function _freqtable(x::Tuple,
     end
 
     if skipmissing
-        filter!((k, v) -> !any(ismissing, k), d)
+        filter!(p -> !any(ismissing, p[1]), d)
     end
 
     keyvec = collect(keys(d))
 
-    dimnames = Vector{Vector}(n)
+    dimnames = Vector{Vector}(undef, n)
     for i in 1:n
         s = Set{vtypes.parameters[i]}()
         for j in 1:length(keyvec)
@@ -76,7 +75,7 @@ function _freqtable(x::Tuple,
     na = NamedArray(a, tuple(dimnames...)::vectypes(vtypes), ntuple(i -> "Dim$i", n))
 
     for (k, v) in d
-        na[k...] = v
+        na[Name.(k)...] = v
     end
 
     na
@@ -85,23 +84,25 @@ end
 freqtable(x::AbstractVector...;
           skipmissing::Bool = false,
           weights::AbstractVector{<:Real} = UnitWeights(),
-          subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
+          subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
     _freqtable(x, skipmissing, weights, subset)
 
 # Internal function needed for now so that n is inferred
 function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = false,
                     weights::AbstractVector{<:Real} = UnitWeights(),
-                    subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
+                    subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
     n == 0 && throw(ArgumentError("at least one argument must be provided"))
 
-    if !isa(subset, Void)
+    if !isa(subset, Nothing)
         x = map(y -> y[subset], x)
         weights = weights[subset]
     end
 
     len = map(length, x)
     miss = map(v -> eltype(v) >: Missing, x)
-    lev = map(v -> eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : levels(v), x)
+    lev = map(x) do v
+        eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : allowmissing(levels(v))
+    end
     dims = map(length, lev)
     # First entry is for missing values (only correct and used if present)
     ord = map((v, d) -> Int[d; CategoricalArrays.order(v.pool)], x, dims)
@@ -121,7 +122,7 @@ function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool =
     missingpossible = any(miss)
 
     @inbounds for i in 1:len[1]
-        ref = x[1].refs[i]        
+        ref = x[1].refs[i]
         el = ord[1][ref + 1]
         anymiss = missingpossible & (ref <= 0)
 
@@ -141,7 +142,7 @@ end
 
 freqtable(x::AbstractCategoricalVector...; skipmissing::Bool = false,
           weights::AbstractVector{<:Real} = UnitWeights(),
-          subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
+          subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
     _freqtable(x, skipmissing, weights, subset)
 
 function freqtable(d::AbstractDataFrame, x::Symbol...; args...)
@@ -214,14 +215,13 @@ julia> sum(pt, (1, 2))
 
 ```
 """
-
 prop(tbl::AbstractArray{<:Number}) = tbl / sum(tbl)
 
 function prop(tbl::AbstractArray{<:Number,N}, margin::Integer...) where N
     lo, hi = extrema(margin)
     (lo < 1 || hi > N) && throw(ArgumentError("margin must be a valid dimension"))
-    tbl ./ sum(tbl, tuple(setdiff(1:N, margin)...))
+    tbl ./ sum(tbl, dims=tuple(setdiff(1:N, margin)...)::NTuple{N-length(margin),Int})
 end
 
 prop(tbl::NamedArray{<:Number}, margin::Integer...) =
-    NamedArray(prop(array(tbl), margin...), tbl.dicts, tbl.dimnames)
+    NamedArray(prop(convert(Array, tbl), margin...), tbl.dicts, tbl.dimnames)
diff --git a/test/freqtable.jl b/test/freqtable.jl
@@ -1,5 +1,5 @@
 using FreqTables
-using Base.Test
+using Test
 
 x = repeat(["a", "b", "c", "d"], outer=[100]);
 # Values not in order to test discrepancy between index and levels with CategoricalArray
@@ -8,7 +8,7 @@ y = repeat(["D", "C", "A", "B"], inner=[10], outer=[10]);
 tab = @inferred freqtable(x)
 @test tab == [100, 100, 100, 100]
 @test names(tab) == [["a", "b", "c", "d"]]
-@test prop(tab) == [0.25, 0.25, 0.25, 0.25]
+@test @inferred prop(tab) == [0.25, 0.25, 0.25, 0.25]
 tab = @inferred freqtable(y)
 @test tab == [100, 100, 100, 100]
 @test names(tab) == [["A", "B", "C", "D"]]
@@ -41,7 +41,7 @@ pt = @inferred prop(tab, 1, 2)
              1.0 1.0 1.0 1.0]
 
 tbl = @inferred prop(rand(5, 5, 5, 5), 1, 2)
-sumtbl = sum(tbl, (3,4))
+sumtbl = sum(tbl, dims=(3,4))
 @test all(x -> x ≈ 1.0, sumtbl)
 
 @test_throws MethodError prop()
@@ -51,9 +51,9 @@ sumtbl = sum(tbl, (3,4))
 @test_throws ArgumentError prop([1,2,3], 2)
 @test_throws ArgumentError prop([1,2,3], 0)
 
-tab =freqtable(x, y,
-               subset=1:20,
-               weights=repeat([1, .5], outer=[10]))
+tab = @inferred freqtable(x, y,
+                          subset=1:20,
+                          weights=repeat([1, .5], outer=[10]))
 @test tab == [2.0 3.0
               1.0 1.5
               3.0 2.0
@@ -85,36 +85,35 @@ tab = @inferred freqtable(cx, cy)
               20 30 30 20]
 @test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]
 
-tab =freqtable(cx, cy,
-               subset=1:20,
-               weights=repeat([1, .5], outer=[10]))
+tab = @inferred freqtable(cx, cy,
+                          subset=1:20,
+                          weights=repeat([1, .5], outer=[10]))
 @test tab == [0.0 0.0 2.0 3.0
               0.0 0.0 1.0 1.5
               0.0 0.0 3.0 2.0
               0.0 0.0 1.5 1.0]
 @test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]
 
 
-using Missings
 const ≅ = isequal
 mx = Array{Union{String, Missing}}(x)
 my = Array{Union{String, Missing}}(y)
 mx[1] = missing
-my[[1, 10, 20, 400]] = missing
+my[[1, 10, 20, 400]] .= missing
 
 mcx = categorical(mx)
 mcy = categorical(my)
 
-tab = freqtable(mx)
-tabc = freqtable(mcx)
+tab = @inferred freqtable(mx)
+tabc = @inferred freqtable(mcx)
 @test tab == tabc == [99, 100, 100, 100, 1]
 @test names(tab) ≅ names(tabc) ≅ [["a", "b", "c", "d", missing]]
-tab = freqtable(my)
-tabc = freqtable(mcy)
+tab = @inferred freqtable(my)
+tabc = @inferred freqtable(mcy)
 @test tab == tabc == [100, 99, 99, 98, 4]
 @test names(tab) ≅ names(tabc) ≅ [["A", "B", "C", "D", missing]]
-tab = freqtable(mx, my)
-tabc = freqtable(mcx, mcy)
+tab = @inferred freqtable(mx, my)
+tabc = @inferred freqtable(mcx, mcy)
 @test tab == tabc == [30 20 20 29 0;
                       30 20 20 29 1;
                       20 30 30 20 0;
@@ -124,16 +123,16 @@ tabc = freqtable(mcx, mcy)
                                   ["A", "B", "C", "D", missing]]
 
 
-tab = freqtable(mx, skipmissing=true)
-tabc = freqtable(mcx, skipmissing=true)
+tab = @inferred freqtable(mx, skipmissing=true)
+tabc = @inferred freqtable(mcx, skipmissing=true)
 @test tab == tabc == [99, 100, 100, 100]
 @test names(tab) ≅ names(tabc) ≅ [["a", "b", "c", "d"]]
-tab = freqtable(my, skipmissing=true)
-tabc = freqtable(mcy, skipmissing=true)
+tab = @inferred freqtable(my, skipmissing=true)
+tabc = @inferred freqtable(mcy, skipmissing=true)
 @test names(tab) ≅ names(tabc) ≅ [["A", "B", "C", "D"]]
 @test tab == tabc == [100, 99, 99, 98]
-tab = freqtable(mx, my, skipmissing=true)
-tabc = freqtable(mcx, mcy, skipmissing=true)
+tab = @inferred freqtable(mx, my, skipmissing=true)
+tabc = @inferred freqtable(mcx, mcy, skipmissing=true)
 @test tab == tabc == [30 20 20 29;
                       30 20 20 29;
                       20 30 30 20;
@@ -143,7 +142,9 @@ tabc = freqtable(mcx, mcy, skipmissing=true)
 using DataFrames, CSV
 
 for docat in [false, true]
-    iris = CSV.read(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"), categorical=docat);
+    iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv"),
+                    DataFrame,
+                    categorical=docat, allowmissing=:none);
     if docat
         iris[:LongSepal] = categorical(iris[:SepalLength] .> 5.0)
     else
@@ -167,8 +168,8 @@ for docat in [false, true]
 end
 
 # Issue #5
-@test freqtable([Set(1), Set(2)]) == [1, 1]
-@test freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == eye(2)
+@test @inferred freqtable([Set(1), Set(2)]) == [1, 1]
+@test @inferred freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == [1 0; 0 1]
 
 @test_throws ArgumentError freqtable()
 @test_throws ArgumentError freqtable(DataFrame())