/
filter.rb
176 lines (154 loc) · 4.14 KB
/
filter.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
module BioTable
# LazyValues fetches values on demand from the @fields array. In the [] method
# a field is transformed into a float when it is called.
class LazyValues
include Enumerable
def initialize fields
@fields = fields
@values = [] # cache values
end
def [] index
if not @values[index]
field = @fields[index]
@values[index] = (Filter::valid_number?(field) ? field.to_f : nil )
end
@values[index]
end
def each &block
@fields.each_with_index do |field,i|
if block_given?
block.call self[i]
else
yield self[i]
end
end
end
def compact
a = []
each do | e |
a << e if e != nil
end
a
end
end
module Filter
# Create an index to the column headers, so header A,B,C,D with columns
# C,A returns [2,0]. It can be the column index is already indexed, return
# it in that case.
#
def Filter::create_column_index columns, header
return nil if not columns
# check whether columns is already a list of numbers
numbers = columns.dup.delete_if { |v| not valid_int?(v) }
if numbers.size == columns.size
return columns.map { |v| v.to_i }
end
# create the index from names
index = []
columns.each do | name |
pos = header.index(name)
raise "Column name #{name} not found!" if pos == nil
index << pos
end
return index
end
# Filter on (indexed) column names, using an expression and return
# a new index
def Filter::filter_column_index index, header, expression
return index if not expression or expression == ""
index = (0..header.size-1).to_a if index == nil
index.map { |idx|
colname = header[idx]
(idx==0 || eval(expression) ? idx : nil)
}.compact
end
def Filter::apply_column_filter fields, index
if index
index.map { |idx| fields[idx] }
else
fields
end
end
def Filter::valid_int?(s)
s.to_i.to_s == s
end
def Filter::valid_number?(s)
# s.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil ? false : true
begin Float(s) ; true end rescue false
end
def Filter::numeric code, fields, header
return true if code == nil
if fields
filter = NumericFilter.new(header)
filter.numeric(code, fields)
else
false
end
end
def Filter::generic code, fields, header
return true if code == nil
if fields
filter = TextualFilter.new(header)
filter.textual(code, fields)
else
false
end
end
end
# FIXME: we should have a faster version too
class TextualFilter
def initialize header
@header = header.map { |name| name.downcase }
end
def textual code, tablefields
field = tablefields.dup
fields = field # alias
@fields = fields
begin
eval(code)
rescue Exception
$stderr.print "Failed to evaluate ",fields," with ",code,"\n"
raise
end
end
def method_missing m, *args, &block
if @header
i = @header.index(m.to_s)
if i != nil
# p @header,i
return @fields[i]
end
raise "Unknown field (can not find column name '#{m}') in list '#{@header}'"
end
raise "Unknown method '#{m}'"
end
end
# FIXME: we should have a faster version too
class NumericFilter
def initialize header
@header = header.map { |name| name.downcase }
end
def numeric code, fields
values = LazyValues.new(fields)
value = values # alias
@values = values
begin
eval(code)
rescue Exception
$stderr.print "Failed to evaluate ",fields," with ",code,"\n"
raise
end
end
def method_missing m, *args, &block
if @header
i = @header.index(m.to_s)
if i != nil
# p @header,i
return @values[i]
end
raise "Unknown value (can not find column name '#{m}') in list '#{@header}'"
end
raise "Unknown method '#{m}'"
end
end
end