Skip to content

Commit

Permalink
Port to Julia 0.7/1.0 (#28)
Browse files Browse the repository at this point in the history
Use the new Name wrapper from NamedArrays 0.9.0 when indexing to avoid ambiguity
with Integer names.
  • Loading branch information
nalimilan committed Sep 5, 2018
1 parent 45067fc commit b293d6a
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 49 deletions.
14 changes: 10 additions & 4 deletions .travis.yml
@@ -1,14 +1,20 @@
language: julia

os:
- osx
- linux
- osx

julia:
- 0.6
- 0.7
- 1.0
- nightly

notifications:
email: false

script:
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
- julia -e 'Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';
- julia -e 'using Pkg; Pkg.clone(pwd()); Pkg.build("FreqTables"); Pkg.test("FreqTables"; coverage=true)';

after_success:
- julia -e 'cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
- julia -e 'using Pkg; cd(Pkg.dir("FreqTables")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -2,7 +2,7 @@

[![Build Status](https://travis-ci.org/nalimilan/FreqTables.jl.svg?branch=master)](https://travis-ci.org/nalimilan/FreqTables.jl)
[![Coverage Status](https://coveralls.io/repos/nalimilan/FreqTables.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/nalimilan/FreqTables.jl?branch=master)
[![FreqTables](http://pkg.julialang.org/badges/FreqTables_0.6.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=0.6)
[![FreqTables](http://pkg.julialang.org/badges/FreqTables_1.0.svg)](http://pkg.julialang.org/?pkg=FreqTables&ver=1.0)

This package allows computing one- or multi-way frequency tables (a.k.a. contingency or pivot tables) from
any type of vector or array. It includes support for [`CategoricalArray`](https://github.com/JuliaData/CategoricalArrays.jl)
Expand Down
4 changes: 2 additions & 2 deletions REQUIRE
@@ -1,4 +1,4 @@
julia 0.6
NamedArrays
julia 0.7
NamedArrays 0.9.1
CategoricalArrays 0.3.0
DataFrames 0.11.0
32 changes: 16 additions & 16 deletions src/freqtable.jl
@@ -1,8 +1,7 @@
import Base.ht_keyindex

# Cf. https://github.com/JuliaStats/StatsBase.jl/issues/135
immutable UnitWeights <: AbstractVector{Int}
end
struct UnitWeights <: AbstractVector{Int} end
Base.getindex(w::UnitWeights, ::Integer...) = 1
Base.getindex(w::UnitWeights, ::AbstractVector) = w

Expand All @@ -15,11 +14,11 @@ Base.@pure vectypes(T) = Tuple{map(U -> Vector{U}, T.parameters)...}
function _freqtable(x::Tuple,
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing)
n = length(x)
n == 0 && throw(ArgumentError("at least one argument must be provided"))

if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end
Expand Down Expand Up @@ -50,12 +49,12 @@ function _freqtable(x::Tuple,
end

if skipmissing
filter!((k, v) -> !any(ismissing, k), d)
filter!(p -> !any(ismissing, p[1]), d)
end

keyvec = collect(keys(d))

dimnames = Vector{Vector}(n)
dimnames = Vector{Vector}(undef, n)
for i in 1:n
s = Set{vtypes.parameters[i]}()
for j in 1:length(keyvec)
Expand All @@ -76,7 +75,7 @@ function _freqtable(x::Tuple,
na = NamedArray(a, tuple(dimnames...)::vectypes(vtypes), ntuple(i -> "Dim$i", n))

for (k, v) in d
na[k...] = v
na[Name.(k)...] = v
end

na
Expand All @@ -85,23 +84,25 @@ end
freqtable(x::AbstractVector...;
skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)

# Internal function needed for now so that n is inferred
function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) where n
n == 0 && throw(ArgumentError("at least one argument must be provided"))

if !isa(subset, Void)
if !isa(subset, Nothing)
x = map(y -> y[subset], x)
weights = weights[subset]
end

len = map(length, x)
miss = map(v -> eltype(v) >: Missing, x)
lev = map(v -> eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : levels(v), x)
lev = map(x) do v
eltype(v) >: Missing && !skipmissing ? [levels(v); missing] : allowmissing(levels(v))
end
dims = map(length, lev)
# First entry is for missing values (only correct and used if present)
ord = map((v, d) -> Int[d; CategoricalArrays.order(v.pool)], x, dims)
Expand All @@ -121,7 +122,7 @@ function _freqtable(x::NTuple{n, AbstractCategoricalVector}, skipmissing::Bool =
missingpossible = any(miss)

@inbounds for i in 1:len[1]
ref = x[1].refs[i]
ref = x[1].refs[i]
el = ord[1][ref + 1]
anymiss = missingpossible & (ref <= 0)

Expand All @@ -141,7 +142,7 @@ end

freqtable(x::AbstractCategoricalVector...; skipmissing::Bool = false,
weights::AbstractVector{<:Real} = UnitWeights(),
subset::Union{Void, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
subset::Union{Nothing, AbstractVector{Int}, AbstractVector{Bool}} = nothing) =
_freqtable(x, skipmissing, weights, subset)

function freqtable(d::AbstractDataFrame, x::Symbol...; args...)
Expand Down Expand Up @@ -214,14 +215,13 @@ julia> sum(pt, (1, 2))
```
"""

prop(tbl::AbstractArray{<:Number}) = tbl / sum(tbl)

function prop(tbl::AbstractArray{<:Number,N}, margin::Integer...) where N
lo, hi = extrema(margin)
(lo < 1 || hi > N) && throw(ArgumentError("margin must be a valid dimension"))
tbl ./ sum(tbl, tuple(setdiff(1:N, margin)...))
tbl ./ sum(tbl, dims=tuple(setdiff(1:N, margin)...)::NTuple{N-length(margin),Int})
end

prop(tbl::NamedArray{<:Number}, margin::Integer...) =
NamedArray(prop(array(tbl), margin...), tbl.dicts, tbl.dimnames)
NamedArray(prop(convert(Array, tbl), margin...), tbl.dicts, tbl.dimnames)
53 changes: 27 additions & 26 deletions test/freqtable.jl
@@ -1,5 +1,5 @@
using FreqTables
using Base.Test
using Test

x = repeat(["a", "b", "c", "d"], outer=[100]);
# Values not in order to test discrepancy between index and levels with CategoricalArray
Expand All @@ -8,7 +8,7 @@ y = repeat(["D", "C", "A", "B"], inner=[10], outer=[10]);
tab = @inferred freqtable(x)
@test tab == [100, 100, 100, 100]
@test names(tab) == [["a", "b", "c", "d"]]
@test prop(tab) == [0.25, 0.25, 0.25, 0.25]
@test @inferred prop(tab) == [0.25, 0.25, 0.25, 0.25]
tab = @inferred freqtable(y)
@test tab == [100, 100, 100, 100]
@test names(tab) == [["A", "B", "C", "D"]]
Expand Down Expand Up @@ -41,7 +41,7 @@ pt = @inferred prop(tab, 1, 2)
1.0 1.0 1.0 1.0]

tbl = @inferred prop(rand(5, 5, 5, 5), 1, 2)
sumtbl = sum(tbl, (3,4))
sumtbl = sum(tbl, dims=(3,4))
@test all(x -> x 1.0, sumtbl)

@test_throws MethodError prop()
Expand All @@ -51,9 +51,9 @@ sumtbl = sum(tbl, (3,4))
@test_throws ArgumentError prop([1,2,3], 2)
@test_throws ArgumentError prop([1,2,3], 0)

tab =freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = @inferred freqtable(x, y,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [2.0 3.0
1.0 1.5
3.0 2.0
Expand Down Expand Up @@ -85,36 +85,35 @@ tab = @inferred freqtable(cx, cy)
20 30 30 20]
@test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]

tab =freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
tab = @inferred freqtable(cx, cy,
subset=1:20,
weights=repeat([1, .5], outer=[10]))
@test tab == [0.0 0.0 2.0 3.0
0.0 0.0 1.0 1.5
0.0 0.0 3.0 2.0
0.0 0.0 1.5 1.0]
@test names(tab) == [["a", "b", "c", "d"], ["A", "B", "C", "D"]]


using Missings
const = isequal
mx = Array{Union{String, Missing}}(x)
my = Array{Union{String, Missing}}(y)
mx[1] = missing
my[[1, 10, 20, 400]] = missing
my[[1, 10, 20, 400]] .= missing

mcx = categorical(mx)
mcy = categorical(my)

tab = freqtable(mx)
tabc = freqtable(mcx)
tab = @inferred freqtable(mx)
tabc = @inferred freqtable(mcx)
@test tab == tabc == [99, 100, 100, 100, 1]
@test names(tab) names(tabc) [["a", "b", "c", "d", missing]]
tab = freqtable(my)
tabc = freqtable(mcy)
tab = @inferred freqtable(my)
tabc = @inferred freqtable(mcy)
@test tab == tabc == [100, 99, 99, 98, 4]
@test names(tab) names(tabc) [["A", "B", "C", "D", missing]]
tab = freqtable(mx, my)
tabc = freqtable(mcx, mcy)
tab = @inferred freqtable(mx, my)
tabc = @inferred freqtable(mcx, mcy)
@test tab == tabc == [30 20 20 29 0;
30 20 20 29 1;
20 30 30 20 0;
Expand All @@ -124,16 +123,16 @@ tabc = freqtable(mcx, mcy)
["A", "B", "C", "D", missing]]


tab = freqtable(mx, skipmissing=true)
tabc = freqtable(mcx, skipmissing=true)
tab = @inferred freqtable(mx, skipmissing=true)
tabc = @inferred freqtable(mcx, skipmissing=true)
@test tab == tabc == [99, 100, 100, 100]
@test names(tab) names(tabc) [["a", "b", "c", "d"]]
tab = freqtable(my, skipmissing=true)
tabc = freqtable(mcy, skipmissing=true)
tab = @inferred freqtable(my, skipmissing=true)
tabc = @inferred freqtable(mcy, skipmissing=true)
@test names(tab) names(tabc) [["A", "B", "C", "D"]]
@test tab == tabc == [100, 99, 99, 98]
tab = freqtable(mx, my, skipmissing=true)
tabc = freqtable(mcx, mcy, skipmissing=true)
tab = @inferred freqtable(mx, my, skipmissing=true)
tabc = @inferred freqtable(mcx, mcy, skipmissing=true)
@test tab == tabc == [30 20 20 29;
30 20 20 29;
20 30 30 20;
Expand All @@ -143,7 +142,9 @@ tabc = freqtable(mcx, mcy, skipmissing=true)
using DataFrames, CSV

for docat in [false, true]
iris = CSV.read(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"), categorical=docat);
iris = CSV.read(joinpath(dirname(pathof(DataFrames)), "../test/data/iris.csv"),
DataFrame,
categorical=docat, allowmissing=:none);
if docat
iris[:LongSepal] = categorical(iris[:SepalLength] .> 5.0)
else
Expand All @@ -167,8 +168,8 @@ for docat in [false, true]
end

# Issue #5
@test freqtable([Set(1), Set(2)]) == [1, 1]
@test freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == eye(2)
@test @inferred freqtable([Set(1), Set(2)]) == [1, 1]
@test @inferred freqtable([Set(1), Set(2)], [Set(1), Set(2)]) == [1 0; 0 1]

@test_throws ArgumentError freqtable()
@test_throws ArgumentError freqtable(DataFrame())

0 comments on commit b293d6a

Please sign in to comment.