Permalink
Browse files

Port to Julia 0.7/1.0 (#28)

Use the new Name wrapper from NamedArrays 0.9.0 when indexing to avoid ambiguity
with Integer names.
  • Loading branch information...
nalimilan committed Sep 5, 2018
1 parent 45067fc commit b293d6ab8d3d62b8efe0e5c17d1e87e761a07a76
Showing with 56 additions and 49 deletions.
  1. +10 −4 .travis.yml
  2. +1 −1 README.md
  3. +2 −2 REQUIRE
  4. +16 −16 src/freqtable.jl
  5. +27 −26 test/freqtable.jl
View
@@ -1,14 +1,20 @@
language: julia
os:
- osx
- linux
- osx
julia:
- 0.6
- 0.7
- 1.0
- nightly
notifications:
email: false
script:
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
- julia -e 'Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';
- julia -e 'using Pkg; Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';
after_success:
- julia -e 'cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
- julia -e 'using Pkg; cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
View
@@ -2,7 +2,7 @@
[![Build Status](https://travis-ci.org/nalimilan/FreqTables.jl.svg?branch=master)](https://travis-ci.org/nalimilan/FreqTables.jl)
[![Coverage Status](https://coveralls.io/repos/nalimilan/FreqTables.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/nalimilan/FreqTables.jl?branch=master)
[![FreqTables](http://pkg.julialang.org/badges/FreqTables_0.6.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=0.6)
[![FreqTables](http://pkg.julialang.org/badges/FreqTables_1.0.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=1.0)
This package allows computing one- or multi-way frequency tables (a.k.a. contingency or pivot tables) from
any type of vector or array. It includes support for [`CategoricalArray`](https://github.com/JuliaData/CategoricalArrays.jl)
View
@@ -1,4 +1,4 @@
julia 0.6
NamedArrays
julia 0.7
NamedArrays 0.9.1
CategoricalArrays 0.3.0
DataFrames 0.11.0
View
@@ -1,8 +1,7 @@
import Base.ht_keyindex
# Cf. https://github.com/JuliaStats/StatsBase.jl/issues/135
immutable UnitWeights <: AbstractVector{Int}
end
struct UnitWeights <: AbstractVector{Int} end
Base.getindex(w::UnitWeights, ::Integer...) = 1
Base.getindex(w::UnitWeights, ::AbstractVector) = w
@@ -15,11 +14,11 @@ Base.@pure vectypes(T) = Tuple{map(U -> Vector{U}, T.parameters)...}
function _freqtable(x::Tuple,
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
n = length(x)
n == 0 && throw(ArgumentError("at least one argument must be provided"))
if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end
@@ -50,12 +49,12 @@ function _freqtable(x::Tuple,
end
if skipmissing
filter!((k, v) -> !any(ismissing, k), d)
filter!(p -> !any(ismissing, p[1]), d)
end
keyvec = collect(keys(d))
dimnames = Vector{Vector}(n)
dimnames = Vector{Vector}(undef, n)
for i in 1:n
s = Set{vtypes.parameters[i]}()
for j in 1:length(keyvec)
@@ -76,7 +75,7 @@ function _freqtable(x::Tuple,
na = NamedArray(a, tuple(dimnames...)::vectypes(vtypes), ntuple(i -> "Dim$i", n))
for (k, v) in d
na[k...] = v
na[Name.(k)...] = v
end
na
@@ -85,23 +84,25 @@ end
freqtable(x::AbstractVector...;
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)
# Internal function needed for now so that n is inferred
function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
n == 0 && throw(ArgumentError("at least one argument must be provided"))
if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end
len = map(length, x)
miss = map(v -> eltype(v) >: Missing, x)
lev = map(v -> eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : levels(v), x)
lev = map(x) do v
eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : allowmissing(levels(v))
end
dims = map(length, lev)
# First entry is for missing values (only correct and used if present)
ord = map((v, d) -> Int[d; CategoricalArrays.order(v.pool)], x, dims)
@@ -121,7 +122,7 @@ function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool =
missingpossible = any(miss)
@inbounds for i in 1:len[1]
ref = x[1].refs[i]
ref = x[1].refs[i]
el = ord[1][ref + 1]
anymiss = missingpossible & (ref <= 0)
@@ -141,7 +142,7 @@ end
freqtable(x::AbstractCategoricalVector...; skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)
function freqtable(d::AbstractDataFrame, x::Symbol...; args...)
@@ -214,14 +215,13 @@ julia> sum(pt, (1, 2))
```
"""
prop(tbl::AbstractArray{<:Number}) = tbl / sum(tbl)
function prop(tbl::AbstractArray{<:Number,N}, margin::Integer...) where N
lo, hi = extrema(margin)
(lo < 1 || hi > N) && throw(ArgumentError("margin must be a valid dimension"))
tbl ./ sum(tbl, tuple(setdiff(1:N, margin)...))
tbl ./ sum(tbl, dims=tuple(setdiff(1:N, margin)...)::NTuple{N-length(margin),Int})
end
prop(tbl::NamedArray{<:Number}, margin::Integer...) =
NamedArray(prop(array(tbl), margin...), tbl.dicts, tbl.dimnames)
NamedArray(prop(convert(Array, tbl), margin...), tbl.dicts, tbl.dimnames)
View
@@ -1,5 +1,5 @@
using FreqTables
using Base.Test
using Test
x = repeat(["a", "b", "c", "d"], outer=[100]);
# Values not in order to test discrepancy between index and levels with CategoricalArray
@@ -8,7 +8,7 @@ y = repeat(["D", "C", "A", "B"], inner=[10], outer=[10]);
tab = @inferred freqtable(x)
@test tab == [100, 100, 100, 100]
@test names(tab) == [["a", "b", "c", "d"]]
@test prop(tab) == [0.25, 0.25, 0.25, 0.25]
@test @inferred prop(tab) == [0.25, 0.25, 0.25, 0.25]
tab = @inferred freqtable(y)
@test tab == [100, 100, 100, 100]
@test names(tab) == [["A", "B", "C", "D"]]
@@ -41,7 +41,7 @@ pt = @inferred prop(tab, 1, 2)
1.0 1.0 1.0 1.0]
tbl = @inferred prop(rand(5, 5, 5, 5), 1, 2)
sumtbl = sum(tbl, (3,4))
sumtbl = sum(tbl, dims=(3,4))
@test all(x -> x ≈ 1.0, sumtbl)
@test_throws MethodError prop()
@@ -51,9 +51,9 @@ sumtbl = sum(tbl, (3,4))
@test_throws ArgumentError prop([1,2,3], 2)
@test_throws ArgumentError prop([1,2,3], 0)
tab =freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = @inferred freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [2.0 3.0
1.0 1.5
3.0 2.0
@@ -85,36 +85,35 @@ tab = @inferred freqtable(cx, cy)
20 30 30 20]
@test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]
tab =freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = @inferred freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [0.0 0.0 2.0 3.0
0.0 0.0 1.0 1.5
0.0 0.0 3.0 2.0
0.0 0.0 1.5 1.0]
@test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]
using Missings
const= isequal
mx = Array{Union{String, Missing}}(x)
my = Array{Union{String, Missing}}(y)
mx[1] = missing
my[[1, 10, 20, 400]] = missing
my[[1, 10, 20, 400]] .= missing
mcx = categorical(mx)
mcy = categorical(my)
tab = freqtable(mx)
tabc = freqtable(mcx)
tab = @inferred freqtable(mx)
tabc = @inferred freqtable(mcx)
@test tab == tabc == [99, 100, 100, 100, 1]
@test names(tab) ≅ names(tabc) ≅ [["a", "b", "c", "d", missing]]
tab = freqtable(my)
tabc = freqtable(mcy)
tab = @inferred freqtable(my)
tabc = @inferred freqtable(mcy)
@test tab == tabc == [100, 99, 99, 98, 4]
@test names(tab) ≅ names(tabc) ≅ [["A", "B", "C", "D", missing]]
tab = freqtable(mx, my)
tabc = freqtable(mcx, mcy)
tab = @inferred freqtable(mx, my)
tabc = @inferred freqtable(mcx, mcy)
@test tab == tabc == [30 20 20 29 0;
30 20 20 29 1;
20 30 30 20 0;
@@ -124,16 +123,16 @@ tabc = freqtable(mcx, mcy)
["A", "B", "C", "D", missing]]
tab = freqtable(mx, skipmissing=true)
tabc = freqtable(mcx, skipmissing=true)
tab = @inferred freqtable(mx, skipmissing=true)
tabc = @inferred freqtable(mcx, skipmissing=true)
@test tab == tabc == [99, 100, 100, 100]
@test names(tab) ≅ names(tabc) ≅ [["a", "b", "c", "d"]]
tab = freqtable(my, skipmissing=true)
tabc = freqtable(mcy, skipmissing=true)
tab = @inferred freqtable(my, skipmissing=true)
tabc = @inferred freqtable(mcy, skipmissing=true)
@test names(tab) ≅ names(tabc) ≅ [["A", "B", "C", "D"]]
@test tab == tabc == [100, 99, 99, 98]
tab = freqtable(mx, my, skipmissing=true)
tabc = freqtable(mcx, mcy, skipmissing=true)
tab = @inferred freqtable(mx, my, skipmissing=true)
tabc = @inferred freqtable(mcx, mcy, skipmissing=true)
@test tab == tabc == [30 20 20 29;
30 20 20 29;
20 30 30 20;
@@ -143,7 +142,9 @@ tabc = freqtable(mcx, mcy, skipmissing=true)
using DataFrames, CSV
for docat in [false, true]
iris = CSV.read(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"), categorical=docat);
iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv"),
DataFrame,
categorical=docat, allowmissing=:none);
if docat
iris[:LongSepal] = categorical(iris[:SepalLength] .> 5.0)
else
@@ -167,8 +168,8 @@ for docat in [false, true]
end
# Issue #5
@test freqtable([Set(1), Set(2)]) == [1, 1]
@test freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == eye(2)
@test @inferred freqtable([Set(1), Set(2)]) == [1, 1]
@test @inferred freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == [1 0; 0 1]
@test_throws ArgumentError freqtable()
@test_throws ArgumentError freqtable(DataFrame())

0 comments on commit b293d6a

Please sign in to comment.