-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.rb
119 lines (93 loc) · 2.84 KB
/
stats.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#Ruby: Statistical Arrays (See related posts)
require 'arrayx' # separate post
# Statistical methods for arrays. Also see NArray Ruby library.
class Float
def roundf(decimel_places)
temp = self.to_s.length
sprintf("%#{temp}.#{decimel_places}f",self).to_f
end
end
class Integer
# For easy reading e.g. 10000 -> 10,000 or 1000000 -> 100,000
# Call with argument to specify delimiter.
def ts(delimiter=',')
st = self.to_s.reverse
r = ""
max = if st[-1].chr == '-'
st.size - 1
else
st.size
end
if st.to_i == st.to_f
1.upto(st.size) {|i| r << st[i-1].chr ; r << delimiter if i%3 == 0 and i < max}
else
start = nil
1.upto(st.size) {|i|
r << st[i-1].chr
start = 0 if r[-1].chr == '.' and not start
if start
r << delimiter if start % 3 == 0 and start != 0 and i < max
start += 1
end
}
end
r.reverse
end
end
class Array
def sum
inject( nil ) { |sum,x| sum ? sum+x : x }
end
def mean
sum=0
self.each {|v| sum += v}
sum/self.size.to_f
end
def variance
m = self.mean
sum = 0.0
self.each {|v| sum += (v-m)**2 }
sum/self.size
end
def stdev
Math.sqrt(self.variance)
end
def count # => Returns a hash of objects and their frequencies within array.
k=Hash.new(0)
self.each {|x| k[x]+=1 }
k
end
def ^(other) # => Given two arrays a and b, a^b returns a new array of objects *not* found in the union of both.
(self | other) - (self & other)
end
def freq(x) # => Returns the frequency of x within array.
h = self.count
h(x)
end
def maxcount # => Returns highest count of any object within array.
h = self.count
x = h.values.max
end
def mincount # => Returns lowest count of any object within array.
h = self.count
x = h.values.min
end
def outliers(x) # => Returns a new array of object(s) with x highest count(s) within array.
h = self.count
min = self.count.values.uniq.sort.reverse.first(x).min
h.delete_if { |x,y| y < min }.keys.sort
end
def zscore(value) # => Standard deviations of value from mean of dataset.
(value - mean) / stdev
end
def covariance(other)
ab = Array.new()
self.each_with_index {|x, i| ab << (x * other[i])}
ab.mean - self.mean * other.mean
end
def correlation(other)
self.covariance(other) / Math.sqrt(self.variance * other.variance)
end
end
#forgot to mention, covariance should check that size of arrays matches with #something like
#fail "arrays have different sizes" if (self.size != other.size)