Skip to content

Commit

Permalink
Merge pull request JuliaLang#19 from JuliaStats/array
Browse files Browse the repository at this point in the history
[WIP] Consolidate functionality under new array function
  • Loading branch information
johnmyleswhite committed Dec 1, 2013
2 parents 96a6241 + 1c710ab commit daeaae9
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 133 deletions.
3 changes: 1 addition & 2 deletions src/DataArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module DataArrays
AbstractDataVector,
allna,
anyna,
array,
autocor,
compact,
cut,
Expand All @@ -37,7 +38,6 @@ module DataArrays
isna,
level_to_index,
levels,
matrix,
NA,
NAException,
NAtype,
Expand All @@ -60,7 +60,6 @@ module DataArrays
set_levels!,
set_levels,
tail,
vector,
xtab,
xtabs

Expand Down
147 changes: 40 additions & 107 deletions src/dataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@ DataArray{N}(t::Type, dims::NTuple{N,Int}) = DataArray(Array(t, dims...),
Base.copy(d::DataArray) = DataArray(copy(d.data), copy(d.na))
Base.deepcopy(d::DataArray) = DataArray(deepcopy(d.data), deepcopy(d.na))
function Base.copy!(dest::DataArray, src::Any)
i = 1
for x in src
dest[i] = x
i += 1
for i in 1:length(src)
dest[i] = src[i]
end
return dest
end
Expand All @@ -66,43 +64,36 @@ Base.length(d::DataArray) = length(d.data)
Base.endof(d::DataArray) = endof(d.data)
Base.eltype{T, N}(d::DataArray{T, N}) = T

# Dealing with NA's
function failNA(da::DataArray)
if anyna(da)
throw(NAException())
else
return copy(da.data)
# Turn a DataArray into an Array. Fail on NA
function array{T}(da::DataArray{T})
n = length(da)
res = Array(T, size(da))
for i in 1:n
if da.na[i]
error(NAException())
else
res[i] = da.data[i]
end
end
return res
end

# NB: Can do strange things on DataArray of rank > 1
function removeNA(da::DataArray)
return copy(da.data[!da.na])
end

function replaceNA(da::DataArray, replacement_val::Any)
res = copy(da.data)
for i in 1:length(da)
function array{T}(da::DataArray{T}, replacement::T)
n = length(da)
res = Array(T, size(da))
for i in 1:n
if da.na[i]
res[i] = replacement_val
res[i] = replacement
else
res[i] = da.data[i]
end
end
return res
end

replaceNA(replacement_val::Any) = x -> replaceNA(x, replacement_val)

# TODO: Re-implement these methods for PooledDataArray's
function failNA{T}(da::AbstractDataArray{T})
if anyna(da)
throw(NAException())
else
res = Array(T, size(da))
for i in 1:length(da)
res[i] = da[i]
end
return res
end
# NB: Can do strange things on DataArray of rank > 1
function removeNA(da::DataArray)
return copy(da.data[!da.na])
end

# TODO: Figure out how to make this work for Array's
Expand All @@ -121,20 +112,7 @@ end

removeNA(a::AbstractArray) = a

function replaceNA{S, T}(da::AbstractDataArray{S}, replacement_val::T)
res = Array(S, size(da))
for i in 1:length(da)
if isna(da[i])
res[i] = replacement_val
else
res[i] = da[i]
end
end
return res
end

# Iterators

type EachFailNA{T}
da::AbstractDataArray{T}
end
Expand Down Expand Up @@ -199,19 +177,19 @@ typealias BooleanIndex Union(BitVector, Vector{Bool})
# v[dv]
function Base.getindex(x::Vector,
inds::AbstractDataVector{Bool})
return x[find(replaceNA(inds, false))]
return x[find(array(inds, replace = false))]
end
function Base.getindex(x::Vector,
inds::AbstractDataArray{Bool})
return x[find(replaceNA(inds, false))]
return x[find(array(inds, replace = false))]
end
function Base.getindex(x::Array,
inds::AbstractDataVector{Bool})
return x[find(replaceNA(inds, false))]
return x[find(array(inds, replace = false))]
end
function Base.getindex(x::Array,
inds::AbstractDataArray{Bool})
return x[find(replaceNA(inds, false))]
return x[find(array(inds, replace = false))]
end
function Base.getindex{S, T}(x::Vector{S},
inds::AbstractDataArray{T})
Expand All @@ -235,7 +213,7 @@ end
# TODO: Return SubDataArray
function Base.getindex(d::DataArray,
inds::AbstractDataVector{Bool})
inds = find(replaceNA(inds, false))
inds = find(array(inds, replace = false))
return d[inds]
end
function Base.getindex(d::DataArray,
Expand Down Expand Up @@ -446,10 +424,22 @@ function Base.convert{S, T, N}(::Type{Array{S, N}}, x::DataArray{T, N})
end
end

function Base.convert{S, T, N}(::Type{DataArray{S, N}}, x::Array{T, N})
return DataArray(convert(Array{S}, x), falses(size(x)))
end

function Base.convert{T, N}(::Type{DataArray}, x::Array{T, N})
return DataArray(x, falses(size(x)))
end

function Base.convert{S, T, N}(::Type{DataArray{S, N}}, x::DataArray{T, N})
return DataArray(convert(Array{S}, x.data), x.na)
end

function Base.convert{T, N}(::Type{DataArray}, x::DataArray{T, N})
return DataArray(x.data, x.na)
end

# Conversion convenience functions

# TODO: Make sure these handle copying correctly
Expand Down Expand Up @@ -482,63 +472,6 @@ for (f, basef) in ((:dataint, :int),
end
end

# Conversion to Array

# TODO: Review these
function vector(adv::AbstractDataVector, t::Type, replacement_val::Any)
n = length(adv)
res = Array(t, n)
for i in 1:n
if isna(adv[i])
res[i] = replacement_val
else
res[i] = adv[i]
end
end
return res
end

function vector(adv::AbstractDataVector, t::Type)
n = length(adv)
res = Array(t, n)
for i in 1:n
res[i] = adv[i]
end
return res
end

vector{T}(adv::AbstractDataVector{T}) = vector(adv, T)

vector{T}(v::Vector{T}) = v

function matrix(adm::AbstractDataMatrix, t::Type, replacement_val::Any)
n, p = size(adm)
res = Array(t, n, p)
for i in 1:n
for j in 1:p
if isna(adm[i, j])
res[i, j] = replacement_val
else
res[i, j] = adm[i, j]
end
end
end
return res
end

function matrix(adm::AbstractDataMatrix, t::Type)
n, p = size(adm)
res = Array(t, n, p)
for i in 1:n
for j in 1:p
res[i, j] = adm[i, j]
end
end
return res
end

matrix{T}(adm::AbstractDataMatrix{T}) = matrix(adm, T)

# Hashing
# TODO: Make sure this agrees with is_equals()

Expand Down
59 changes: 59 additions & 0 deletions src/pooleddataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -800,3 +800,62 @@ function PooledDataVecs(v1::AbstractArray,
return (PooledDataArray(RefArray(refs1), pool),
PooledDataArray(RefArray(refs2), pool))
end

function Base.convert{S, T, N}(::Type{PooledDataArray{S, N}}, x::Array{T, N})
return PooledDataArray(convert(Array{S}, x), falses(size(x)))
end

function Base.convert{T, N}(::Type{PooledDataArray}, x::Array{T, N})
return PooledDataArray(x, falses(size(x)))
end

function Base.convert{S, T, N}(::Type{PooledDataArray{S, N}}, x::DataArray{T, N})
return PooledDataArray(convert(Array{S}, x.data), x.na)
end

function Base.convert{T, N}(::Type{PooledDataArray}, x::DataArray{T, N})
return PooledDataArray(x.data, x.na)
end

function Base.convert{S, T, N}(::Type{PooledDataArray{S, N}}, x::PooledDataArray{T, N})
return PooledDataArray(convert(Array{S}, x.data), x.na)
end

function Base.convert{T, N}(::Type{PooledDataArray}, x::PooledDataArray{T, N})
return PooledDataArray(x.data, x.na)
end

function Base.convert{S, T, N}(::Type{DataArray{S, N}}, x::PooledDataArray{T, N})
return PooledDataArray(convert(Array{S}, x.data), x.na)
end

function Base.convert{T, N}(::Type{DataArray}, x::PooledDataArray{T, N})
return PooledDataArray(x.data, x.na)
end

# Turn a PooledDataArray into an Array. Fail on NA
function array{T, R}(da::PooledDataArray{T, R})
n = length(da)
res = Array(T, size(da))
for i in 1:n
if da.refs[i] == zero(R)
error(NAException())
else
res[i] = da.pool[da.refs[i]]
end
end
return res
end

function array{T, R}(da::PooledDataArray{T, R}, replacement::T)
n = length(da)
res = Array(T, size(da))
for i in 1:n
if da.refs[i] == zero(R)
res[i] = replacement
else
res[i] = da.pool[da.refs[i]]
end
end
return res
end
3 changes: 0 additions & 3 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ module TestConstructors

dv = DataArray([1, 2, 3], falses(3))
@assert isequal(dv, DataArray(1:3))
@assert isequal(dv, DataArray(DataArray([1, 2, 3])))

dv = DataArray(Int, 3)
@assert isequal(eltype(dv), Int)
Expand Down Expand Up @@ -99,8 +98,6 @@ module TestConstructors

@assert isequal(dm, DataArray(trues(2, 2)))

@assert isequal(DataArray([1 2; 3 4]), DataArray(DataArray([1 2; 3 4])))

dm = DataArray(Int, 2, 2)
@assert isequal(eltype(dm), Int)
@assert isequal(dm.na, trues(2, 2))
Expand Down
8 changes: 2 additions & 6 deletions test/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,18 @@ module TestConversions

# Test vector() and matrix() conversion tools
dv = @data ones(5)
@assert isa(vector(dv), Vector{Float64})
@assert isa(array(dv), Vector{Float64})
@assert isa(convert(Vector{Float64}, dv), Vector{Float64})
dv[1] = NA
# Should raise errors:
# vector(dv)
# convert(Vector{Float64}, dv)
@assert isa(vector(dv, Any), Vector{Any})
@assert isnan(vector(dv, Float64, NaN)[1])

dm = @data ones(3, 3)
@assert isa(matrix(dm), Matrix{Float64})
@assert isa(array(dm), Matrix{Float64})
@assert isa(convert(Matrix{Float64}, dm), Matrix{Float64})
dm[1, 1] = NA
# Should raise errors:
# matrix(dm)
# convert(Matrix{Float64}, dm)
@assert isa(matrix(dm, Any), Matrix{Any})
@assert isnan(matrix(dm, Float64, NaN)[1, 1])
end
12 changes: 5 additions & 7 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ module TestData
@assert isa(dvstr, DataVector{ASCIIString})
# @test throws_exception(DataArray([5:8], falses(2)), Exception)

@assert isequal(DataArray(dvint), dvint)

#test_group("PooledDataVector creation")
pdvstr = @pdata ["one", "one", "two", "two", NA, "one", "one"]
@assert isa(pdvstr, PooledDataVector{ASCIIString})
Expand Down Expand Up @@ -94,27 +92,27 @@ module TestData

#test_group("DataVector to something else")
@assert all(removeNA(dvint) .== [1, 2, 4])
@assert all(replaceNA(dvint, 0) .== [1, 2, 0, 4])
@assert all(array(dvint, 0) .== [1, 2, 0, 4])
@assert all(convert(Vector{Int}, dvint2) .== [5:8])
@assert all([i + 1 for i in dvint2] .== [6:9])
@assert all([length(x)::Int for x in dvstr] == [3, 3, 1, 4])
@assert repr(dvint) == "[1,2,NA,4]"

#test_group("PooledDataVector to something else")
@assert all(removeNA(pdvstr) .== ["one", "one", "two", "two", "one", "one"])
@assert all(replaceNA(pdvstr, "nine") .== ["one", "one", "two", "two", "nine", "one", "one"])
@assert all(array(pdvstr, "nine") .== ["one", "one", "two", "two", "nine", "one", "one"])
@assert all([length(i)::Int for i in pdvstr] .== [3, 3, 3, 3, 1, 3, 3])
@assert string(pdvstr[1:3]) == "[one, one, two]"

#test_group("DataVector Filter and Replace")
@assert isequal(removeNA(dvint), [1, 2, 4])
@assert isequal(replaceNA(dvint, 7), [1, 2, 7, 4])
@assert isequal(array(dvint, 7), [1, 2, 7, 4])
@assert sum(removeNA(dvint)) == 7
@assert sum(replaceNA(dvint, 7)) == 14
@assert sum(array(dvint, 7)) == 14

#test_group("PooledDataVector Filter and Replace")
@assert reduce(string, "", removeNA(pdvstr)) == "oneonetwotwooneone"
@assert reduce(string, "", replaceNA(pdvstr,"!")) == "oneonetwotwo!oneone"
@assert reduce(string, "", array(pdvstr, "!")) == "oneonetwotwo!oneone"

#test_group("DataVector assignment")
assigntest = @data [1, 2, NA, 4]
Expand Down
Loading

0 comments on commit daeaae9

Please sign in to comment.