/
math.go
236 lines (205 loc) · 6.36 KB
/
math.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Copyright 2021-2023
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package dataframe
import (
"math"
"github.com/rs/zerolog/log"
"gonum.org/v1/gonum/floats"
"gonum.org/v1/gonum/stat"
)
// AddScalar adds the scalar value to all columns in dataframe df and returns a new dataframe
// panics if rows are not equal.
func (df *DataFrame) AddScalar(scalar float64) *DataFrame {
df = df.Copy()
for colIdx := range df.ColNames {
for rowIdx := range df.Vals[colIdx] {
df.Vals[colIdx][rowIdx] += scalar
}
}
return df
}
// AddVec adds the vector to all columns in dataframe and returns a new dataframe
// panics if rows are not equal.
func (df *DataFrame) AddVec(vec []float64) *DataFrame {
df = df.Copy()
for idx := range df.ColNames {
floats.Add(df.Vals[idx], vec)
}
return df
}
// Count creates a new dataframe with the number of columns where the expression lambda func(float64) bool evaluates to true is placed
// in the `count` column
func (df *DataFrame) Count(lambda func(x float64) bool) *DataFrame {
res := &DataFrame{
Dates: df.Dates,
Vals: [][]float64{make([]float64, df.Len())},
ColNames: []string{"count"},
}
for rowIdx := range df.Dates {
cnt := 0
for _, col := range df.Vals {
if lambda(col[rowIdx]) {
cnt++
}
}
res.Vals[0][rowIdx] = float64(cnt)
}
return res
}
// Div divides all columns in `df` by the corresponding column in `other` and returns a new dataframe.
// Panics if rows are not equal.
func (df *DataFrame) Div(other *DataFrame) *DataFrame {
df = df.Copy()
otherMap := make(map[string]int, len(other.ColNames))
for idx, val := range other.ColNames {
otherMap[val] = idx
}
for idx, colName := range df.ColNames {
if otherIdx, ok := otherMap[colName]; ok {
floats.Div(df.Vals[idx], other.Vals[otherIdx])
}
}
return df
}
// Mean calculates the mean of all like columns in the dataframes and returns a new dataframe
// panics if rows are not equal.
func Mean(dfs ...*DataFrame) *DataFrame {
resDf := dfs[0].Copy()
otherMaps := make([]map[string]int, len(dfs))
for dfIdx, resDf := range dfs {
otherMaps[dfIdx] = make(map[string]int, len(resDf.ColNames))
for idx, val := range resDf.ColNames {
otherMaps[dfIdx][val] = idx
}
}
for resColIdx, colName := range resDf.ColNames {
for rowIdx := range resDf.Vals[0] {
row := 0.0
cnt := 0.0
for dfIdx := range dfs {
df := dfs[dfIdx]
colIdx := otherMaps[dfIdx][colName]
row += df.Vals[colIdx][rowIdx]
cnt++
}
resDf.Vals[resColIdx][rowIdx] = row / cnt
}
}
return resDf
}
// Mul multiplies all columns in dataframe df by the corresponding column in dataframe other and returns a new dataframe
// panics if rows are not equal.
func (df *DataFrame) Mul(other *DataFrame) *DataFrame {
df = df.Copy()
otherMap := make(map[string]int, len(other.ColNames))
for idx, val := range other.ColNames {
otherMap[val] = idx
}
for idx, colName := range df.ColNames {
if otherIdx, ok := otherMap[colName]; ok {
floats.Mul(df.Vals[idx], other.Vals[otherIdx])
}
}
return df
}
// MulScalar multiplies all columns in dataframe df by the scalar and returns a new dataframe
// panics if rows are not equal.
func (df *DataFrame) MulScalar(scalar float64) *DataFrame {
df = df.Copy()
for colIdx := range df.ColNames {
for rowIdx := range df.Vals[colIdx] {
df.Vals[colIdx][rowIdx] *= scalar
}
}
return df
}
// RollingSumScaled computes ∑ df[ii] * scalar and returns a new dataframe
// panics if rows are not equal.
func (df *DataFrame) RollingSumScaled(ii int, scalar float64) *DataFrame {
df2 := df.Copy()
for colIdx := range df.ColNames {
roll := 0.0
dropIdx := 0
for rowIdx := range df.Vals[colIdx] {
switch {
case rowIdx >= ii:
roll += df.Vals[colIdx][rowIdx]
roll -= df.Vals[colIdx][dropIdx]
df2.Vals[colIdx][rowIdx] = roll * scalar
dropIdx++
case rowIdx == (ii - 1):
roll += df.Vals[colIdx][rowIdx]
df2.Vals[colIdx][rowIdx] = roll * scalar
default:
df2.Vals[colIdx][rowIdx] = math.NaN()
roll += df.Vals[colIdx][rowIdx]
}
}
}
return df2
}
// SMA computes the simple moving average of all the columns in df for the specified
// lookback period. The length of the resulting dataframe equals that of the input with NaNs during the warm-up period.
// Invalid lookback periods result in a dataframe of all NaN.
// NOTE: lookback is in terms of date periods. if the dataframe is sampled monthly then SMA is monthly,
func (df *DataFrame) SMA(lookback int) *DataFrame {
// check that lookback is a valid period
if (lookback > df.Len()) || (lookback <= 0) {
log.Error().Stack().Int("Lookback", lookback).Int("NRows", df.Len()).Msg("lookback must be: 0 < lookback <= NRows")
nullDf := &DataFrame{
Dates: df.Dates,
Vals: make([][]float64, df.ColCount()),
ColNames: df.ColNames,
}
for colIdx := range nullDf.Vals {
nullDf.Vals[colIdx] = make([]float64, df.Len())
for rowIdx := range nullDf.Vals[colIdx] {
nullDf.Vals[colIdx][rowIdx] = math.NaN()
}
}
return nullDf
}
filterBank := make([][]float64, df.ColCount())
for idx := range filterBank {
filterBank[idx] = make([]float64, lookback)
}
smaVals := make([][]float64, df.ColCount())
for idx := range smaVals {
smaVals[idx] = make([]float64, df.Len())
}
warmup := true
for rowIdx := range df.Dates {
// if we have seen at least lookback rows then we are out of the warmup period
// NOTE: row is 0 based, lookback is 1 based; hence the test applied below
if rowIdx == (lookback - 1) {
warmup = false
}
filterBankIdx := rowIdx % lookback
for colIdx := range df.Vals {
filterBank[colIdx][filterBankIdx] = df.Vals[colIdx][rowIdx]
if warmup {
smaVals[colIdx][rowIdx] = math.NaN()
} else {
smaVals[colIdx][rowIdx] = stat.Mean(filterBank[colIdx], nil)
}
}
}
smaDf := &DataFrame{
Dates: df.Dates,
Vals: smaVals,
ColNames: df.ColNames,
}
return smaDf
}