forked from slmnhq/chorus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
boxplot_summary.rb
48 lines (43 loc) · 1.69 KB
/
boxplot_summary.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class BoxplotSummary
def self.mean(a,b)
(a + b) / 2.0
end
def self.summarize(i, bins)
new_map = []
all_categories = i.map{|r| r[:bucket]}.uniq
total = i.inject(0) { |sum, r| sum + r[:count] }
all_categories.each do |category|
subarray = i.select{ |r| r[:bucket] == category }
min = subarray.first[:min]
max = subarray.last[:max]
count = subarray.inject(0) { |sum, r| sum + r[:count] }
percentage = "%0.2f\%" % (count.to_f / total * 100)
if subarray.length == 1
median = first_quartile = third_quartile = subarray[0][:min]
elsif subarray.length == 2
median = mean(subarray[0][:min], subarray[1][:min])
first_quartile = mean(subarray[0][:min], median)
third_quartile = mean(subarray[1][:min], median)
elsif subarray.length == 3
median = subarray[1][:min]
first_quartile = mean(subarray[0][:min], subarray[1][:min])
third_quartile = mean(subarray[1][:min], subarray[2][:min])
else
median = mean(subarray[1][:max], subarray[2][:min])
first_quartile = mean(subarray[0][:max], subarray[1][:min])
third_quartile = mean(subarray[2][:max], subarray[3][:min])
end
new_map << {:bucket => category,
:count => count,
:min => min,
:median => median,
:max => max,
:first_quartile => first_quartile,
:third_quartile => third_quartile,
:percentage => percentage}
end
new_map = new_map.sort {|a, b| b[:percentage] <=> a[:percentage] }
new_map = new_map[0..bins-1] if bins.present? && bins > 0
return new_map
end
end