-
Notifications
You must be signed in to change notification settings - Fork 0
/
sequel_vectorized.rb
97 lines (67 loc) · 2.39 KB
/
sequel_vectorized.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
require 'narray'
require 'gsl'
class Sequel::Dataset
def vectorize options={}
result = {}
axis = (options[:axis] ||= {})
# transform dataset to hash of arrays
dataset = filter(axis[:column] => axis[:range])
dataset.map {|row| row.each{|att,value| (result[att] ||= []) << value}}
# transform numeric and boolean arrays to narrays
result.each do |k,v|
first = v.first
if first.kind_of?(Numeric) then
v[0] = first.to_f # so NArray is always of type float
result[k] = NArray.to_na v
elsif first == true || first == false then
result[k] = NArray.float(v.size)
result[k][] = v.map {|i| (i == true) ? 1 : 0 }
end
end
# add dot notation
columns.each do |col|
result.instance_eval %Q{def #{col}; self[:#{col}]; end}
end
if (axis.empty? || result.empty?) then
result
else
_process(result, axis)
end
end
private
def _process data, axis
axis_col = axis[:column]
step = axis[:step]
range = axis[:range]
interpolate = axis[:interpolate]
new_size = (range.last - range.first)/step.to_f
new_size +=1 unless range.exclude_end?
raw_axis = data[axis_col]
interpolate = false if raw_axis.size <= 1 # turn off interpolation of insufficient data
data["__#{axis_col}".to_sym] = raw_axis
interp = GSL::Interp.alloc("linear", raw_axis.size) if interpolate
data[axis_col] = NArray.float(new_size).indgen!(range.first.to_f,step)
data[:__raw_mask] = NArray.byte(new_size)
data[:__raw_mask][(raw_axis - range.first)/step.to_f] = 1
data.keys.each do |k|
next if (k == axis_col || k == "__#{axis_col}".to_sym)
v = data[k]
# if first is a float, vector is a NArray and will be interpolated
if v[0].is_a? Float then
data["__#{k}".to_sym] = v #backup data in :__key
if interpolate
# problem with GSL lack of NArray support on compilation
raw_axis_vector = GSL::Vector[raw_axis.to_a]
v_vector = GSL::Vector[v.to_a]
data_vector = GSL::Vector[data[axis_col].to_a]
data[k] = interp.init(raw_axis_vector, v_vector)
data[k] = NArray.to_na data[k].eval(raw_axis_vector, v_vector, data_vector).to_a
else
data[k] = NArray.float(new_size)
data[k][(raw_axis - range.first)/step.to_f] = v
end
end
end
data
end
end