Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend CategoricalArrays #716

Merged
merged 3 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,19 @@ TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[weakdeps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"

[extensions]
DimensionalDataCategoricalArraysExt = "CategoricalArrays"
DimensionalDataMakie = "Makie"

[compat]
Adapt = "2, 3.0, 4"
Aqua = "0.8"
ArrayInterface = "7"
BenchmarkTools = "1"
CategoricalArrays = "0.10"
CairoMakie = "0.10, 0.11"
ColorTypes = "0.11"
Combinatorics = "1"
Expand Down Expand Up @@ -73,6 +76,7 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
Expand All @@ -91,4 +95,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"

[targets]
test = ["Aqua", "ArrayInterface", "BenchmarkTools", "ColorTypes", "Combinatorics", "CoordinateTransformations", "DataFrames", "Distributions", "Documenter", "ImageFiltering", "ImageTransformations", "CairoMakie", "OffsetArrays", "Plots", "Random", "SafeTestsets", "StatsPlots", "Test", "Unitful"]
test = ["Aqua", "ArrayInterface", "BenchmarkTools", "CategoricalArrays", "ColorTypes", "Combinatorics", "CoordinateTransformations", "DataFrames", "Distributions", "Documenter", "ImageFiltering", "ImageTransformations", "CairoMakie", "OffsetArrays", "Plots", "Random", "SafeTestsets", "StatsPlots", "Test", "Unitful"]
73 changes: 73 additions & 0 deletions ext/DimensionalDataCategoricalArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
module DimensionalDataCategoricalArraysExt

import DimensionalData, CategoricalArrays
const DD = DimensionalData
const CAs = CategoricalArrays
CategoricalDimArray = DD.AbstractDimArray{<:Union{Missing, CAs.CategoricalValue}}

# categorical and cut take a dimarray and return a categorical dim array
function CAs.categorical(x::DD.AbstractDimArray; kw...)
ca = CAs.categorical(Base.parent(x); kw...)
DD.rebuild(x; data = ca)
end

# Need to define these separately to avoid ambiguity
CAs.cut(x::DD.AbstractDimArray, ng::Integer; kw...) = DD.rebuild(x; data = CAs.cut(Base.parent(x),ng; kw...))
CAs.cut(x::DD.AbstractDimArray, breaks::AbstractVector; kw...) = DD.rebuild(x; data = CAs.cut(Base.parent(x),breaks; kw...))

CAs.recode(x::DD.AbstractDimArray, pairs::Pair...) = CAs.recode(x, nothing, pairs...)
CAs.recode(x::DD.AbstractDimArray, default::Any, pairs::Pair...) = DD.rebuild(x; data = CAs.recode(Base.parent(x),default, pairs...))

# function that mutate in-place
for f in [:levels!, :droplevels!, :fill!, :ordered!]
@eval function CAs.$f(x::CategoricalDimArray, args...; kw...)
CAs.$f(Base.parent(x), args...; kw...)
return x
end
end

# functions that rebuild the categorical array
for f in [:compress, :decompress]
@eval CAs.$f(x::CategoricalDimArray, args...; kw...) =
DD.rebuild(x; data = CAs.$f(Base.parent(x), args...; kw...))
end

# functions that do not mutate
for f in [:levels, :leveltype, :pool, :refs, :isordered]
@eval CAs.$f(x::CategoricalDimArray, args...; kw...) = CAs.$f(Base.parent(x), args...; kw...)
end

## Recode! methods
# methods without a default - needed to avoid ambiguity
CAs.recode!(dest::DD.AbstractDimArray, src::AbstractArray, pairs::Pair...) = CAs.recode!(dest, src, nothing, pairs...)
CAs.recode!(dest::AbstractArray, src::DD.AbstractDimArray, pairs::Pair...) = CAs.recode!(dest, src, nothing, pairs...)
CAs.recode!(dest::DD.AbstractDimArray, src::CAs.CategoricalArray, pairs::Pair...) = CAs.recode!(dest, src, nothing, pairs...)
CAs.recode!(dest::CAs.CategoricalArray, src::DD.AbstractDimArray, pairs::Pair...) = CAs.recode!(dest, src, nothing, pairs...)
CAs.recode!(dest::DD.AbstractDimArray, src::DD.AbstractDimArray, pairs::Pair...) = CAs.recode!(dest, src, nothing, pairs...)
# methods with a single array
CAs.recode!(a::DD.AbstractDimArray, default::Any, pairs::Pair...) = CAs.recode!(a, a, default, pairs...)
CAs.recode!(a::DD.AbstractDimArray, pairs::Pair...) = CAs.recode!(a, a, nothing, pairs...)

# methods with default
function CAs.recode!(dest::DD.AbstractDimArray, src::AbstractArray, default, pairs::Pair...)
CAs.recode!(Base.parent(dest), src, default, pairs...)
return dest
end
function CAs.recode!(dest::AbstractArray, src::DD.AbstractDimArray, default, pairs::Pair...)
CAs.recode!(dest, Base.parent(src), default, pairs...)
return dest
end
function CAs.recode!(dest::DD.AbstractDimArray, src::CAs.CategoricalArray, default, pairs::Pair...)
CAs.recode!(Base.parent(dest), src, default, pairs...)
return dest
end
function CAs.recode!(dest::CAs.CategoricalArray, src::DD.AbstractDimArray, default, pairs::Pair...)
CAs.recode!(dest, Base.parent(src), default, pairs...)
return dest
end
function CAs.recode!(dest::DD.AbstractDimArray, src::DD.AbstractDimArray, default, pairs::Pair...)
CAs.recode!(Base.parent(dest), Base.parent(src), pairs...)
return dest
end

end
106 changes: 106 additions & 0 deletions test/categorical.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
using DimensionalData, CategoricalArrays

x = DimArray([1, 2, 3], X(1:3))
c = categorical(x; levels = [1,2,3,4])
x2 = DimArray([1, 2, 3, missing], X(1:4))
c2 = categorical(x2; levels = [1,2,3,4])

@test c isa DimArray
@test c2 isa DimArray

@testset "compress" begin
c_compressed = compress(c)
@test c_compressed isa DimArray
@test eltype(CategoricalArrays.refs(c_compressed)) == UInt8

c_decompressed = decompress(c_compressed)
@test c_decompressed isa DimArray
@test eltype(CategoricalArrays.refs(c_decompressed)) == UInt32
end

@testset "levels" begin
@test CategoricalArrays.leveltype(c) == Int64
@test CategoricalArrays.leveltype(c2) == Int64
@test levels(c) == levels(c2) == [1,2,3,4]
droplevels!(c)
droplevels!(c2)
@test levels(c) == levels(c2) == [1,2,3]
c3 = levels!(c, [1,2,3,4])
levels!(c2, [1,2,3,4])
@test levels(c) == levels(c2) == [1,2,3,4]
@test c3 === c

@test !isordered(c)
ordered!(c, true)
@test isordered(c)

fill!(c2, 1) |> droplevels!
@test levels(c2) == [1]
end

@testset "recode" begin
c = categorical(x)
c2 = categorical(x2)
# on a normal dim array
rc1 = recode(x, 1 => 2)
@test rc1 == [2,2,3]
@test rc1 isa DimArray
# with a default
rc2 = recode(x, 2, 3 => 4)
@test rc2 == [2,2,4]
@test rc2 isa DimArray
# on a categorical dim array
rc3 = recode(c, 1 => 2)
@test rc3 == [2,2,3]
@test rc3 isa DimArray

# in-place
recode!(c, 1 => 2)
@test c == [2,2,3]

c3 = categorical(x)
recode!(c3, c, 2 => 3)
@test c3 == [3,3,3]

# from a dim array to a normal array
c = categorical(x)
A = categorical([1,2,2])
recode!(A, c, 3 => 2)
@test A == [1,2,2]
recode!(A, x, 2 => 1, 3 => 2)
@test A == [1,1,2]

# with a default
recode!(A, c, 3, 2 => 1)
@test A == [3,1,3]
recode!(A, x, 3, 2 => 1)
@test A == [3,1,3]

## from an array to a dim array
A = categorical([1,2,3])
rc = recode!(c3, A, 2 => 3)
@test c3 == [1,3,3]
@test c3 isa DimArray
@test rc === c3
recode!(x, A, 2 => 3)
@test x == [1,3,3]
# with a default
recode!(c3, A, 2, 2 => 3)
@test c3 == [2,3,2]
recode!(x, A, 2, 2 => 3)
@test x == [2,3,2]
end

@testset "cut" begin
x = DimArray([0.0, 0.2, 0.4, 0.6], X(1:4))
c = cut(x,2)
@test c isa DimArray{<:CategoricalArrays.CategoricalValue}
@test length(levels(c)) == 2
@test all(CategoricalArrays.refs(c) .== [1,1,2,2])

c2 = cut(x, [0.1, 0.5, 1.0];extend = missing)
@test c2 isa DimArray{<:Union{Missing, <:CategoricalArrays.CategoricalValue}}
@test length(levels(c2)) == 2
@test all(CategoricalArrays.refs(c2) .== [0,1,1,2])
@test ismissing(first(c2))
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ using DimensionalData, Test, Aqua, SafeTestsets
@time @safetestset "show" begin include("show.jl") end
@time @safetestset "adapt" begin include("adapt.jl") end
@time @safetestset "ecosystem" begin include("ecosystem.jl") end
@time @safetestset "categorical" begin include("categorical.jl") end
if Sys.islinux()
# Unfortunately this can hang on other platforms.
# Maybe ram use of all the plots on the small CI machine? idk
Expand Down
Loading