forked from rocketlaunchr/dataframe-go
/
describe_series.go
94 lines (76 loc) · 1.97 KB
/
describe_series.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved.
package pandas
import (
"context"
"math"
"sort"
"gonum.org/v1/gonum/stat"
dataframe "github.com/rocketlaunchr/dataframe-go"
)
func describeSeries(ctx context.Context, s dataframe.Series, opts ...DescribeOptions) (DescribeOutput, error) {
nc, _ := s.NilCount()
out := DescribeOutput{
percentiles: opts[0].Percentiles,
headers: []string{s.Name()},
Count: []int{s.NRows()},
NilCount: []int{nc},
}
var (
sf *dataframe.SeriesFloat64
floatable bool
)
if sf64, ok := s.(*dataframe.SeriesFloat64); ok {
sf = sf64
floatable = true
} else {
_, floatable = s.(dataframe.ToSeriesFloat64)
if floatable {
var err error
sf, err = s.(dataframe.ToSeriesFloat64).ToSeriesFloat64(ctx, false)
if err != nil && sf == nil {
return DescribeOutput{}, err
}
}
}
if floatable {
var vals []float64
// Arrange values from lowest to highest
for _, v := range sf.Values {
if !math.IsNaN(v) {
vals = append(vals, v)
}
}
sort.Float64s(vals)
// Median
func() {
defer func() {
if x := recover(); x != nil {
out.Median = []float64{math.NaN()}
}
}()
out.Median = []float64{stat.Quantile(0.5, stat.Empirical, vals, nil)}
}()
// Mean
out.Mean = []float64{stat.Mean(vals, nil)}
// Std Dev
out.StdDev = []float64{stat.StdDev(vals, nil)}
// Percentiles
out.Percentiles = append(out.Percentiles, []float64{})
for _, p := range opts[0].Percentiles {
func() {
defer func() {
if x := recover(); x != nil {
out.Percentiles[len(out.Percentiles)-1] = append(out.Percentiles[len(out.Percentiles)-1], math.NaN())
}
}()
q := stat.Quantile(p, stat.Empirical, vals, nil)
out.Percentiles[len(out.Percentiles)-1] = append(out.Percentiles[len(out.Percentiles)-1], q)
}()
}
if len(vals) > 0 {
out.Min = []float64{vals[0]}
out.Max = []float64{vals[len(vals)-1]}
}
}
return out, nil
}