@@ -37,21 +37,21 @@ struct Summary {
37
37
///|
38
38
fn Summary ::new (
39
39
name ? : String ,
40
- data : Array [Double ],
40
+ sorted_data ~ : Array [Double ],
41
41
batch_size : Int
42
42
) -> Summary {
43
- let sum = sum (data )
44
- let min = min (data )
45
- let max = max (data )
46
- let mean = mean (data )
47
- let median = median (data )
48
- let var = var (data )
49
- let std_dev = std_dev (data )
50
- let std_dev_pct = std_dev_pct (data )
51
- let median_abs_dev = median_abs_dev (data )
52
- let median_abs_dev_pct = median_abs_dev_pct (data )
53
- let quartiles = quartiles (data )
54
- let iqr = iqr (data )
43
+ let sum = sum (sorted_data )
44
+ let min = min (sorted_data ~ )
45
+ let max = max (sorted_data ~ )
46
+ let mean = mean (sorted_data , sum ~ )
47
+ let median = median (sorted_data ~ )
48
+ let var = var (sorted_data , mean ~ )
49
+ let std_dev = std_dev (var ~ )
50
+ let std_dev_pct = std_dev_pct (mean ~, std_dev ~ )
51
+ let median_abs_dev = median_abs_dev (sorted_data , median_ = median )
52
+ let median_abs_dev_pct = median_abs_dev_pct (median ~, median_abs_dev ~ )
53
+ let quartiles = quartiles (sorted_data ~ )
54
+ let iqr = iqr (quartiles ~ )
55
55
{
56
56
name ,
57
57
sum ,
@@ -67,7 +67,7 @@ fn Summary::new(
67
67
quartiles ,
68
68
iqr ,
69
69
batch_size ,
70
- runs : data .length (),
70
+ runs : sorted_data .length (),
71
71
}
72
72
}
73
73
@@ -81,45 +81,31 @@ fn sum(data : Array[Double]) -> Double {
81
81
}
82
82
83
83
///|
84
- fn min (data : Array [Double ]) -> Double {
85
- let mut min = data [0 ]
86
- for i in data {
87
- if i < min {
88
- min = i
89
- }
90
- }
91
- min
84
+ fn min (sorted_data ~ : Array [Double ]) -> Double {
85
+ sorted_data [0 ]
92
86
}
93
87
94
88
///|
95
- fn max (data : Array [Double ]) -> Double {
96
- let mut max = data [0 ]
97
- for i in data {
98
- if i > max {
99
- max = i
100
- }
101
- }
102
- max
89
+ fn max (sorted_data ~ : Array [Double ]) -> Double {
90
+ sorted_data [sorted_data .length () - 1 ]
103
91
}
104
92
105
93
///|
106
- fn mean (data : Array [Double ]) -> Double {
107
- let sum = sum (data )
94
+ fn mean (data : Array [Double ], sum ~ : Double ) -> Double {
108
95
let count = data .length ()
109
96
sum / count .to_double ()
110
97
}
111
98
112
99
///|
113
- fn median (data : Array [Double ]) -> Double {
114
- percentile (data , 50.0 )
100
+ fn median (sorted_data ~ : Array [Double ]) -> Double {
101
+ percentile (sorted_data ~, pct = 50.0 )
115
102
}
116
103
117
104
///|
118
- fn var (data : Array [Double ]) -> Double {
105
+ fn var (data : Array [Double ], mean ~ : Double ) -> Double {
119
106
if data .length () < 2 {
120
107
return 0.0
121
108
}
122
- let mean = mean (data )
123
109
let mut v = 0.0
124
110
for i in data {
125
111
let d = i - mean
@@ -129,102 +115,101 @@ fn var(data : Array[Double]) -> Double {
129
115
}
130
116
131
117
///|
132
- fn std_dev (data : Array [ Double ] ) -> Double {
133
- var ( data ) .sqrt ()
118
+ fn std_dev (var ~ : Double ) -> Double {
119
+ var .sqrt ()
134
120
}
135
121
136
122
///|
137
- fn std_dev_pct (data : Array [Double ]) -> Double {
138
- let mean = mean (data )
139
- let std_dev = std_dev (data )
123
+ fn std_dev_pct (mean ~ : Double , std_dev ~ : Double ) -> Double {
140
124
if mean == 0.0 {
141
125
return 0.0
142
126
}
143
127
std_dev / mean * 100.0
144
128
}
145
129
146
130
///|
147
- fn median_abs_dev (data : Array [Double ]) -> Double {
148
- let med = median ( data )
149
- let abs_devs = data . map ( fn ( x ) { ( med - x ). abs () } )
131
+ fn median_abs_dev (data : Array [Double ], median_ ~ : Double ) -> Double {
132
+ let abs_devs = data . map ( fn ( x ) { ( median_ - x ). abs () } )
133
+ abs_devs . sort ( )
150
134
// https://en.wikipedia.org/wiki/Median_absolute_deviation
151
- median (abs_devs ) * 1.4826
135
+ median (sorted_data = abs_devs ) * 1.4826
152
136
}
153
137
154
138
///|
155
- fn median_abs_dev_pct (data : Array [Double ]) -> Double {
156
- let med = median (data )
157
- let mad = median_abs_dev (data )
158
- if med == 0.0 {
139
+ fn median_abs_dev_pct (median ~ : Double , median_abs_dev ~ : Double ) -> Double {
140
+ if median == 0.0 {
159
141
return 0.0
160
142
}
161
- mad / med * 100.0
143
+ median_abs_dev / median * 100.0
162
144
}
163
145
164
146
///|
165
- fn quartiles (data : Array [Double ]) -> (Double , Double , Double ) {
166
- let sorted = data .copy ()
167
- sorted .sort ()
168
- try {
169
- let q1 = percentile_of_sorted (sorted , 25.0 )
170
- let q2 = percentile_of_sorted (sorted , 50.0 )
171
- let q3 = percentile_of_sorted (sorted , 75.0 )
172
- (q1 , q2 , q3 )
173
- } catch {
174
- _ => panic ()
175
- }
147
+ fn quartiles (sorted_data ~ : Array [Double ]) -> (Double , Double , Double ) {
148
+ let q1 = percentile (sorted_data ~, pct = 25.0 )
149
+ let q2 = percentile (sorted_data ~, pct = 50.0 )
150
+ let q3 = percentile (sorted_data ~, pct = 75.0 )
151
+ (q1 , q2 , q3 )
176
152
}
177
153
178
154
///|
179
- fn iqr (data : Array [ Double ] ) -> Double {
180
- let (q1 , _ , q3 ) = quartiles ( data )
155
+ fn iqr (quartiles ~ : ( Double , Double , Double ) ) -> Double {
156
+ let (q1 , _ , q3 ) = quartiles
181
157
q3 - q1
182
158
}
183
159
184
160
///|
185
- fn percentile_of_sorted ( data : Array [Double ], pct : Double ) -> Double ! {
186
- assert_false ( data . is_empty ())
187
- assert_true ( pct >= 0.0 && pct <= 100.0 )
188
- if data .length () == 1 {
189
- return data [0 ]
161
+ fn percentile ( sorted_data ~ : Array [Double ], pct ~ : Double ) -> Double {
162
+ guard sorted_data . length () > 0
163
+ guard pct >= 0.0 && pct <= 100.0
164
+ if sorted_data .length () == 1 {
165
+ return sorted_data [0 ]
190
166
}
191
167
if pct == 100.0 {
192
- return data [ data .length () - 1 ]
168
+ return sorted_data [ sorted_data .length () - 1 ]
193
169
}
194
- let length = (data .length () - 1 ).to_double ()
170
+ let length = (sorted_data .length () - 1 ).to_double ()
195
171
let rank = pct / 100 * length
196
172
let lrank = rank .floor ()
197
173
let d = rank - lrank
198
174
let n = lrank .to_int ()
199
- let lo = data [n ]
200
- let hi = data [n + 1 ]
175
+ let lo = sorted_data [n ]
176
+ let hi = sorted_data [n + 1 ]
201
177
lo + (hi - lo ) * d
202
178
}
203
179
204
180
///|
205
- fn percentile (data : Array [Double ], pct : Double ) -> Double {
206
- let sorted = data .copy ()
207
- sorted .sort ()
208
- try percentile_of_sorted (sorted , pct ) catch {
209
- _ => panic ()
181
+ fn winsorize (sorted_data ~ : Array [Double ], pct : Double ) -> Unit {
182
+ let lo = percentile (sorted_data ~, pct ~)
183
+ let hi = percentile (sorted_data ~, pct = 100.0 - pct )
184
+ for i , samp in sorted_data {
185
+ if samp > hi {
186
+ sorted_data [i ] = hi
187
+ } else if samp < lo {
188
+ sorted_data [i ] = lo
189
+ }
210
190
}
211
191
}
212
192
213
193
///|
214
- fn winsorize (data : Array [Double ], pct : Double ) -> Unit {
215
- let sorted = data .copy ()
216
- sorted .sort ()
217
- try {
218
- let lo = percentile_of_sorted (sorted , pct )
219
- let hi = percentile_of_sorted (sorted , 100.0 - pct )
220
- for i , samp in data {
221
- if samp > hi {
222
- data [i ] = hi
223
- } else if samp < lo {
224
- data [i ] = lo
225
- }
226
- }
227
- } catch {
228
- _ => panic ()
229
- }
194
+ test {
195
+ let data = [1.1 , 21.4 , 2.2 , 3.3 , 4.5 , 12.5 , 33.3 , 14.4 ]
196
+ data .sort ()
197
+ let summary = Summary ::new (sorted_data = data , 3 )
198
+ assert_true (summary .sum.is_close (92.7 ))
199
+ assert_true (summary .min.is_close (1.1 ))
200
+ assert_true (summary .max.is_close (33.3 ))
201
+ assert_true (summary .mean.is_close (11.5875 ))
202
+ assert_true (summary .median.is_close (8.5 ))
203
+ assert_true (summary .var.is_close (127.64125 ))
204
+ assert_true (summary .std_dev.is_close (11.297842714430043 ))
205
+ assert_true (summary .std_dev_pct.is_close (97.50026075020534 ))
206
+ assert_true (summary .median_abs_dev.is_close (9.04386 ))
207
+ assert_true (summary .median_abs_dev_pct.is_close (106.39835294117646 ))
208
+ let (q1 , q2 , q3 ) = summary .quartiles
209
+ assert_true (q1 .is_close (3.025 ))
210
+ assert_true (q2 .is_close (8.5 ))
211
+ assert_true (q3 .is_close (16.15 ))
212
+ assert_true (summary .iqr.is_close (13.125 ))
213
+ assert_true (summary .batch_size == 3 )
214
+ assert_true (summary .runs == 8 )
230
215
}
0 commit comments