Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/more broadcasting #28

Merged
merged 5 commits into from
Oct 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ version = "0.19.4"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "f94423c68f2e47db0d6f626a26d4872266e0ec3d"
git-tree-sha1 = "2103e504f427e54ffa19af9ada225733a21f951f"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.2"
version = "0.17.3"

[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "RLEVectors"
uuid = "17b45ede-fd0d-54ef-b825-8cf9fc64da95"
version = "0.9.0"
version = "0.9.1"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand Down
2 changes: 1 addition & 1 deletion src/RLEDataFrame-type.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ end
function Base.getindex(x::RLEDataFrame, i, j)
ind = index(x)
j_inds = [ ind[x] for x in j ]
cols = [ RLEVector(x.columns[j_ind][i]) for j_ind in j_inds ] # FIXME: converting to RLEVector should not be necessary
cols = [ x.columns[j_ind][i] for j_ind in j_inds ]
RLEDataFrame( cols, names(x)[j_inds] )
end
Base.getindex(x::RLEDataFrame, i::Integer, j::ColumnIndex) = x[j][i]
Expand Down
4 changes: 2 additions & 2 deletions src/RLEVector-type.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ const RLEVectorList{T1,T2} = Vector{ RLEVector{T1,T2} }
Base.copy(x::RLEVector) = RLEVector(copy(x.runvalues), copy(x.runends))

# similar
function Base.similar(x::RLEVector)
copy(x)
function Base.similar(a::RLEVector{T1,T2}, ::Type{T}, dims::Tuple{Int}) where {T1,T2,T}
RLEVector{T,T2}(Vector{T}(undef,1), [dims[1]])
end

# show
Expand Down
2 changes: 2 additions & 0 deletions src/RLEVectors.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module RLEVectors

using Base.Broadcast
using Requires
using Statistics
using StatsBase
Expand All @@ -18,6 +19,7 @@ export eltype, vcat, pop!, push!, popfirst!, pushfirst!, insert!, deleteat
export deleterun!, decrement_run!

# indexing
import Base.Broadcast: BroadcastStyle, Broadcasted
import Base: getindex, setindex!
import Base: iterate
export getindex, setindex!, ind2run, setrun!, ind2runcontext, RLERangesIterator, eachrange, tapply, iterate
Expand Down
20 changes: 16 additions & 4 deletions src/group_generics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,23 @@ for op in summary_group
end
end

Base.broadcast(f, x::RLEVector, y...) = RLEVector( [f(el,y...) for el in x.runvalues], ends(x) )
function Base.broadcast(f, x::RLEVector, y::RLEVector)
(runends, runvalues_x, runvalues_y) = disjoin(x, y)
RLEVector( map(f,runvalues_x,runvalues_y), runends )
Base.BroadcastStyle(::Type{<:RLEVector}) = Broadcast.ArrayStyle{RLEVector}()
function Base.similar(bc::Broadcast.Broadcasted{Broadcast.ArrayStyle{RLEVector}}, ::Type{ElType}) where ElType
RLEVector(Vector{ElType}(undef,1), [size(bc)[1]])
end
function Base.copyto!(dest::RLEVector, bc::Broadcast.Broadcasted{Nothing})
axes(dest) == axes(bc) || Broadcast.throwdm(axes(dest), axes(bc))
res = Broadcast.preprocess(dest, bc)
for (i,x) in enumerate(res)
dest[i] = res[i]
end
dest
end
#Base.broadcast(f, x::RLEVector, y...) = RLEVector( [f(el,y...) for el in x.runvalues], ends(x) )
#function Base.broadcast(f, x::RLEVector, y::RLEVector)
# (runends, runvalues_x, runvalues_y) = disjoin(x, y)
# RLEVector( map(f,runvalues_x,runvalues_y), runends )
#end
Base.map(f, x::RLEVector) = RLEVector( map(f,x.runvalues), ends(x) )

## Methods that take two arguments, delegate to rle.runvalues and return something other than an RLEVector
Expand Down
184 changes: 83 additions & 101 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,107 +125,89 @@ function Base.setindex!(rle::RLEVector, value, i::Integer)
end

## Indexing optimizations
# function Base.getindex(rle::RLEVector, i::Colon) # FIXME: delete?
# copy(rle)
# end
#
# # FIXME: Bool methods can go if methods for ind == abstract array are tightened to AE of Integer
# function Base.getindex(x::RLEVector, ind::Array{Bool, 1}) # FIXME: delete?
# x[ findall(ind) ]
# end
#
# function Base.setindex!(x::RLEVector, value::AbstractArray, ind::Array{Bool, 1}) # FIXME: delete?
# x[ findall(ind) ] = value
# x
# end
#
# function Base.getindex(x::RLEVector, indices::UnitRange)
# (left_run, right_run, index_in_left_run, run_remainder_right) = ind2runcontext(x,indices)
# n_run = (right_run - left_run) + 1
# f = first(indices)
# v = similar(x.runvalues, n_run)
# e = similar(x.runends, n_run)
# offset = f - 1
# i = 1
# @inbounds while left_run <= right_run
# v[i] = x.runvalues[left_run]
# e[i] = x.runends[left_run] - offset
# left_run = left_run + 1
# i = i + 1
# end
# e[end] = e[end] - run_remainder_right
# RLEVector{eltype(x),endtype(x)}(v, e)
# end
#
# function Base.getindex(x::RLEVector, i::AbstractVector)
# run_indices = ind2run(x, i)
# RLEVector( x.runvalues[ run_indices ] )
# end
#
# function Base.setindex!(x::RLEVector, value::AbstractVector, indices::UnitRange)
# setindex!(x, RLEVector(value), indices)
# end
#
# #function Base.setindex!(x::RLEVector, value, indices::UnitRange)
# # setindex!(x, RLEVector(value, length(indices)), indices)
# #end
#
# function Base.setindex!(x::RLEVector, value::RLEVector, indices::UnitRange)
# length(value) != length(indices) && throw(BoundsError())
# i_left = first(indices)
# i_right = last(indices)
# if i_left == i_right
# return(setindex!(x,ins,i_left))
# end
# nrun_x = nrun(x)
# nrun_value = nrun(value)
# (run_left, run_right, index_in_run_left, run_remainder_right) = ind2runcontext(x,indices)
# # Move run markers to denote parts of original data that will be kept, accomodating completely filled runs or adjacent matches
# # We will keep 1:run_left and run_right:end and fill in the middle with value
# # FIXME: factor out these two expressions for something like ind2insertcontext
# fix_partial_run_left = false
# if x.runvalues[run_left] == first(value)
# run_left = run_left - 1
# elseif index_in_run_left == 1
# run_left = run_left - 1
# if run_left > 0 && first(value) == x.runvalues[run_left]
# run_left = run_left - 1
# end
# else
# fix_partial_run_left = true
# end
# if x.runvalues[run_right] == last(value)
# nrun_value = nrun_value - 1
# elseif run_remainder_right == 0
# run_right = run_right + 1
# if run_right <= nrun_x && last(value) == x.runvalues[run_right]
# nrun_value = nrun_value - 1
# end
# end
# nrun_out = run_left + nrun_value + ((nrun_x - run_right) + 1)
# nrun_diff = nrun_out - nrun_x
# # Resize and move
# if nrun_diff > 0
# growat!(x, run_right, nrun_diff)
# elseif nrun_diff < 0
# delete_range = (run_left + 1):(run_left - nrun_diff)
# deleteat!(x.runvalues, delete_range)
# deleteat!(x.runends, delete_range)
# end
# if fix_partial_run_left
# x.runends[run_left] = i_left - 1
# end
# # Insert incoming values
# value_runvalues = value.runvalues
# value_runends = value.runends
# bump = i_left - 1
# @inbounds for i in 1:nrun_value
# il = i + run_left
# x.runvalues[il] = value_runvalues[i]
# x.runends[il] = value_runends[i] + bump
# end
# x
# end
function Base.getindex(x::RLEVector, indices::UnitRange)
(left_run, right_run, index_in_left_run, run_remainder_right) = ind2runcontext(x,indices)
n_run = (right_run - left_run) + 1
f = first(indices)
v = similar(x.runvalues, n_run)
e = similar(x.runends, n_run)
offset = f - 1
i = 1
@inbounds while left_run <= right_run
v[i] = x.runvalues[left_run]
e[i] = x.runends[left_run] - offset
left_run = left_run + 1
i = i + 1
end
e[end] = e[end] - run_remainder_right
RLEVector{eltype(x),endtype(x)}(v, e)
end

function Base.getindex(x::RLEVector, i::AbstractVector{<:Int})
run_indices = ind2run(x, i)
RLEVector( x.runvalues[ run_indices ] )
end

function Base.setindex!(x::RLEVector, value::AbstractVector, indices::UnitRange)
setindex!(x, RLEVector(value), indices)
end

function Base.setindex!(x::RLEVector, value::RLEVector, indices::UnitRange)
length(value) != length(indices) && throw(BoundsError())
i_left = first(indices)
i_right = last(indices)
if i_left == i_right
return(setindex!(x,ins,i_left))
end
nrun_x = nrun(x)
nrun_value = nrun(value)
(run_left, run_right, index_in_run_left, run_remainder_right) = ind2runcontext(x,indices)
# Move run markers to denote parts of original data that will be kept, accomodating completely filled runs or adjacent matches
# We will keep 1:run_left and run_right:end and fill in the middle with value
# FIXME: factor out these two expressions for something like ind2insertcontext
fix_partial_run_left = false
if x.runvalues[run_left] == first(value)
run_left = run_left - 1
elseif index_in_run_left == 1
run_left = run_left - 1
if run_left > 0 && first(value) == x.runvalues[run_left]
run_left = run_left - 1
end
else
fix_partial_run_left = true
end
if x.runvalues[run_right] == last(value)
nrun_value = nrun_value - 1
elseif run_remainder_right == 0
run_right = run_right + 1
if run_right <= nrun_x && last(value) == x.runvalues[run_right]
nrun_value = nrun_value - 1
end
end
nrun_out = run_left + nrun_value + ((nrun_x - run_right) + 1)
nrun_diff = nrun_out - nrun_x
# Resize and move
if nrun_diff > 0
growat!(x, run_right, nrun_diff)
elseif nrun_diff < 0
delete_range = (run_left + 1):(run_left - nrun_diff)
deleteat!(x.runvalues, delete_range)
deleteat!(x.runends, delete_range)
end
if fix_partial_run_left
x.runends[run_left] = i_left - 1
end
# Insert incoming values
value_runvalues = value.runvalues
value_runends = value.runends
bump = i_left - 1
@inbounds for i in 1:nrun_value
il = i + run_left
x.runvalues[il] = value_runvalues[i]
x.runends[il] = value_runends[i] + bump
end
x
end

## Iterators
# Iterator for ranges based on RLE e.g. (value, start:end)
Expand Down
26 changes: 13 additions & 13 deletions test/test_indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ x[3] = 1
#
# # just left, no match
# x = RLEVector([1,2,3,4],[4,8,12,16])
# x[9:10] = 5
# x[9:10] .= 5
# @test x.runvalues == [1,2,5,3,4]
# @test x.runends == [4,8,10,12,16]
#
Expand Down Expand Up @@ -170,7 +170,7 @@ x[3] = 1
# @test collect(x) == [1,5,5,5,3,3,4,4]
# @test x.runvalues == [1,5,3,4]
# @test x.runends == [1,4,6,8]

#
## range with vector
x = RLEVector([1,2,3,4],[2,4,6,8])
x[2:4] = [5,6,7]
Expand Down Expand Up @@ -216,19 +216,19 @@ x[4:-1:2] = [5,6,7]
@test x[4:-1:2] == [5,6,7]

# Colon
# x = RLEVector([1,2,3,4],[2,4,6,8])
# @test x[:] == x
# x[:] .= 4
# @test x == RLEVector([4 for i in 1:8])
x = RLEVector([1,2,3,4],[2,4,6,8])
@test x[:] == x
#x[:] .= 4
#@test x == RLEVector([4 for i in 1:8])

# Logical
# x = RLEVector([1,2],[2,4])
# @test x[ [ true,true,true,false ] ] == x[ [1,2,3] ]
# x[ [true,true,true,false] ] .= 4
# @test x == RLEVector([4,4,4,2])
# x = RLEVector([1,2],[2,4])
# x[ [true,true,true,false] ] = [4,5,6]
# @test x == RLEVector([4,5,6,2])
x = RLEVector([1,2],[2,4])
@test x[ [ true,true,true,false ] ] == RLEVector([1,2],[2,3])
#x[ [true,true,true,false] ] .= 4
#@test x == RLEVector([4,4,4,2])
x = RLEVector([1,2],[2,4])
x[ [true,true,true,false] ] = [4,5,6]
@test x == RLEVector([4,5,6,2])

# eachrange iterator
x = RLEVector([1, 1, 2, 2, 7, 12])
Expand Down
2 changes: 1 addition & 1 deletion test/test_rledataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ z = RLEDataFrame( a=RLEVector([5,2,2]), b=RLEVector([4,4,4]), c=RLEVector([3,2,1
@test_throws ArgumentError RLEDataFrame( [RLEVector([1])], [:a,:b] )
@test_throws ArgumentError RLEDataFrame( [RLEVector([1]), RLEVector([2,3])], [:a,:b] )

# Getting and setting
# Getting and setting
z = RLEDataFrame( a=RLEVector([5,2,2]), b=RLEVector([4,4,4]), c=RLEVector([3,2,1]) )
@test z[:] == z
@test z[2] == RLEVector([4,4,4])
Expand Down
4 changes: 4 additions & 0 deletions test/test_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ y = RLEVector([4,4,5,5,6,6])

@test RLEVector(5,3) == RLEVector([5,5,5])

sim = similar(RLEVector([1,2,3],[2,4,6]), Int32, (9))
@test length(sim) == 9
@test isa(values(sim), Vector{Int32}) == true

# Conversion
x = RLEVector([4,4,5,5,6,7,8])
@test convert(Vector,x) == [4,4,5,5,6,7,8]
Expand Down