/
struct.jl
234 lines (195 loc) · 7.61 KB
/
struct.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# This is used to store experiment details
abstract type AbstractExperiment end
mutable struct Experiment <: AbstractExperiment
data::DataFrame
description::String
selected_features::Array{Int64,1}
selected_entries::Array{Int64,1}
end
# Constructor
# WARNING: data is not copied so any transformation of the Experiment's data
# will modify the original `data` DataFrame fed to the constructor.
function Experiment(data; description = "No description provided")
return Experiment(data, description, 1:ncol(data), 1:nrow(data))
end
function Base.show(io::IO, e::Experiment)
compact = get(io, :compact, false)
if compact
show(io, "Exp.: "*string(length(e.selected_entries))*"/"*string(nrow(e.data))*
"-"*string(length(e.selected_features))*"/"*string(ncol(e.data)))
else
show(io, "Experiment with "*string(length(e.selected_entries))*"/"*string(nrow(e.data))*
" entries and "*string(length(e.selected_features))*"/"*string(ncol(e.data))*
" features selected.")
end
end
abstract type AbstractReduce end
abstract type AbstractFilter <: AbstractReduce end
abstract type AbstractSimpleFilter <: AbstractFilter end
abstract type AbstractCombinationFilter <: AbstractFilter end
abstract type AbstractMissingFilter <: AbstractFilter end
mutable struct Filter <: AbstractSimpleFilter
value::Any
feature::Symbol
compare::Function
description::String
end
# Constructor
function Filter(value, feature;
compare = isequal, description = "No description provided")
return Filter(value, feature, compare, description)
end
mutable struct CombinationFilter <: AbstractCombinationFilter
filter1::AbstractFilter
filter2::AbstractFilter
operator::Function
end
mutable struct MissingFilter <: AbstractMissingFilter end
# Constructor
function MissingFilter(feature;
description = "No description provided")
return Filter(1, feature, (x,y) -> !ismissing(x), description)
end
# Methods
"""Return filtered entries in an Experiment `e` based on filter `f`
"""
function filter_entries(e::AbstractExperiment, f::AbstractSimpleFilter)
expEntries = e.data[e.selected_entries, f.feature]
return(e.selected_entries[f.compare.(expEntries, f.value)])
end
function filter_entries(e::AbstractExperiment, f::AbstractCombinationFilter)
e1 = filter_entries(e, f.filter1)
e2 = filter_entries(e, f.filter2)
return(sort(f.operator(e1, e2)))
end
function filter_entries(e::AbstractExperiment, f::AbstractMissingFilter)
return(sort(∩([filter_entries(e, MissingFilter(f))
for f in Symbol.(names(e.data)[e.selected_features])]...)))
end
"""Filter entries in an Experiment `e` based on filter(s) `f`,
updating `e.selected_entries` in place accordingly.
"""
function filter_entries!(e::AbstractExperiment, f::AbstractFilter)
# Currently returns the indices kept
e.selected_entries = filter_entries(e,f)
end
function filter_entries!(e::AbstractExperiment, filters::Array{T,1}) where {T<:AbstractFilter}
for f in filters
filter_entries!(e, f)
end
end
abstract type AbstractReduce end
abstract type AbstractSelector <: AbstractReduce end
abstract type AbstractSimpleSelector <: AbstractSelector end
abstract type AbstractNameSelector <: AbstractSelector end
abstract type AbstractCombinationSelector <: AbstractSelector end
mutable struct Selector <: AbstractSimpleSelector
summarize::Function
subset::Union{Function, Nothing}
description::String
end
# Constructor
function Selector(summarize; subset = nothing,
description = "No description provided")
return Selector(summarize, subset, description)
end
mutable struct NameSelector <: AbstractNameSelector
summarize::Function
description::String
end
# Constructor
function NameSelector(summarize; description = "No description provided")
return NameSelector(summarize, description)
end
mutable struct CombinationSelector <: AbstractCombinationSelector
selector1::AbstractSelector
selector2::AbstractSelector
operator::Function
end
# Methods
"""Return selected features in an Experiment `e` based on selectors `s`
"""
function select_features(e::AbstractExperiment, s::AbstractSimpleSelector)
# NB: isnothing was not implemented in 1.0
if s.subset === nothing
data = e.data[e.selected_entries, e.selected_features]
else
subIndices = s.subset(e.data[e.selected_entries,:])
data = e.data[e.selected_entries[subIndices], e.selected_features]
end
selectedFtDF = mapcols(s.summarize, data)
return(e.selected_features[[x for x in selectedFtDF[1,:]]])
end
function select_features(e::AbstractExperiment, s::AbstractNameSelector)
selectedFtDF = map(s.summarize, names(e.data[:,e.selected_features]))
return(e.selected_features[selectedFtDF])
end
function select_features(e::AbstractExperiment, s::AbstractCombinationSelector)
f1 = select_features(e, s.selector1)
f2 = select_features(e, s.selector2)
return(sort(s.operator(f1, f2)))
end
"""Return selected features in an Experiment `e` based on selectors `s`,
updating `e.selected_features` in place accordingly.
"""
function select_features!(e::AbstractExperiment, s::AbstractSelector)
# Currently returns the indices kept
e.selected_features = select_features(e,s)
end
function select_features!(e::AbstractExperiment, selectors::Array{T,1}) where {T<:AbstractSelector}
for s in selectors
select_features!(e, s)
end
end
# Not exported in favor of the shorter "filter!" and "select!"
"""For an experiment `e`, update in place `e.selected_features` and
`e.selected_entries` based on an array `arr` of feature selectors and
entry filters. Filters and selectors are applied sequentially.
"""
function filter_experiment!(e::AbstractExperiment, arr::Array{T,1}) where {T<:AbstractReduce}
for a in arr
filter!(e, a)
end
end
"""For an experiment `e`, update in place `e.selected_features` and
`e.selected_entries` based on a feature selector `s`.
"""
function filter_experiment!(e::AbstractExperiment, s::AbstractSelector)
select_features!(e,s)
end
"""For an experiment `e`, update in place `e.selected_features` and
`e.selected_entries` based on an entry filter `f`.
"""
filter_experiment!(e::AbstractExperiment, f::AbstractFilter) = filter_entries!(e,f)
# Aliases
@doc (@doc filter_experiment!)
Base.filter!(e::AbstractExperiment,
arr::Array{T,1}) where {T<:AbstractReduce} = filter_experiment!(e,arr)
Base.filter!(e::AbstractExperiment, s::AbstractSelector) = filter_experiment!(e,s)
Base.filter!(e::AbstractExperiment, f::AbstractFilter) = filter_experiment!(e,f)
@doc (@doc filter_experiment!)
DataFrames.select!(e::AbstractExperiment,
arr::Array{T,1}) where {T<:AbstractReduce} = filter_experiment!(e,arr)
DataFrames.select!(e::AbstractExperiment, s::AbstractSelector) = filter_experiment!(e,s)
DataFrames.select!(e::AbstractExperiment, f::AbstractFilter) = filter_experiment!(e,f)
"""Return a negative Filter or Selector by inverting
the entries or features that are kept and excluded.
"""
function negation(r::Union{AbstractNameSelector,AbstractSimpleSelector})
neg_r = deepcopy(r)
neg_r.summarize = !neg_r.summarize
neg_r.description = "Do not "*neg_r.description
return(neg_r)
end
function negation(r::AbstractSimpleFilter)
neg_r = deepcopy(r)
neg_r.compare = !neg_r.compare
neg_r.description = "Do not "*neg_r.description
return(neg_r)
end
"""Return a copy of the data in Experiment `e` for its
selected entries and features.
"""
function getdata(e::Experiment)
return(e.data[e.selected_entries, e.selected_features])
end