Skip to content

Commit

Permalink
Add support for multiples of duration, like pandas
Browse files Browse the repository at this point in the history
This change means that you can specify the unit of a bin-size that's a
multiple of a duration. So instead of a unit like `minute`, you can
divide it by multiples of a minute, e.g. `10minute` causing the data to
be divided into 10-minute sized bins.
  • Loading branch information
lelandbatey committed Oct 30, 2021
1 parent dbaa984 commit c638509
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 17 deletions.
65 changes: 48 additions & 17 deletions tbin/tbin.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package tbin
import (
"fmt"
"sort"
"strconv"
"unicode"
)

const TD_1_ms int64 = 1
Expand Down Expand Up @@ -72,22 +74,19 @@ var ABBREV_TO_CHARTJS_UNIT map[string]string = map[string]string{
// giving you the "bin" where this timestamp belongs in a histogram with bins
// of size 'frequency'. If 'frequency' does not stand for a known bin-size,
// then an error is returned.
func BinTimestamp(ts int64, freq string) (int64, error) {
abbrev, ok := TIMEDELTA_ABBREVS[freq]
if !ok {
return 0, fmt.Errorf("no timedelta configured for frequency of %q", freq)
}
delt, ok := ABBREV_TO_DELT[abbrev]
if !ok {
return 0, fmt.Errorf("no timedelta configured for frequency of %q leading to abbrev %q", freq, abbrev)
func BinTimestamp(ts int64, spec string) (int64, error) {
mult, delt, err := ParseSpec(spec)
if err != nil {
return 0, err
}
return (ts / delt) * delt, nil
d := delt * mult
return (ts / d) * d, nil
}

func BinTimestamps(tss []int64, freq string) (map[int64]int64, error) {
func BinTimestamps(tss []int64, spec string) (map[int64]int64, error) {
hist := map[int64]int64{}
for _, ts := range tss {
bin, err := BinTimestamp(ts, freq)
bin, err := BinTimestamp(ts, spec)
if err != nil {
return nil, err
}
Expand All @@ -97,15 +96,17 @@ func BinTimestamps(tss []int64, freq string) (map[int64]int64, error) {
hist[bin] = hist[bin] + 1
}
sort.SliceStable(tss, func(i, j int) bool { return tss[i] < tss[j] })
// We can ignore errors because if there were errors they'd have been
// caught on these inputs already.
delt := ABBREV_TO_DELT[TIMEDELTA_ABBREVS[freq]]
minbin, _ := BinTimestamp(tss[0], freq)
maxbin, _ := BinTimestamp(tss[len(tss)-1], freq)
mult, basedelt, err := ParseSpec(spec)
if err != nil {
return nil, err
}
delt := mult * basedelt
minbin, _ := BinTimestamp(tss[0], spec)
maxbin, _ := BinTimestamp(tss[len(tss)-1], spec)
cur := minbin
for cur < maxbin {
cur += delt
cb, _ := BinTimestamp(cur, freq)
cb, _ := BinTimestamp(cur, spec)
if _, ok := hist[cb]; !ok {
hist[cb] = 0
}
Expand Down Expand Up @@ -158,3 +159,33 @@ func EstimateBinSize(tss []int64) (string, string) {
jsunit := ABBREV_TO_CHARTJS_UNIT[unit]
return unit, jsunit
}

func ParseSpec(unit string) (mult int64, delt int64, err error) {
rs := []rune(unit)
var numbers []rune
var letters []rune
for _, r := range rs {
if unicode.IsNumber(r) {
numbers = append(numbers, r)
} else {
letters = append(letters, r)
}
}
if len(numbers) == 0 {
mult = 1
} else {
mult, err = strconv.ParseInt(string(numbers), 10, 64)
if err != nil {
return 0, 0, err
}
}
abbrev, ok := TIMEDELTA_ABBREVS[string(letters)]
if !ok {
return 0, 0, fmt.Errorf("no timedelta configured for abbreviation of %q", string(letters))
}
delt, ok = ABBREV_TO_DELT[abbrev]
if !ok {
return 0, 0, fmt.Errorf("no timedelta configured for frequency of %q leading to abbrev %q", string(letters), abbrev)
}
return mult, delt, nil
}
61 changes: 61 additions & 0 deletions tbin/tbin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package tbin

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"
)

func TestParseSpec(t *testing.T) {
type tcase struct {
Spec string
ExpMult int64
ExpDelt int64
ExpErr error
}

for idx, test := range []tcase{
{
Spec: "30D",
ExpMult: 30,
ExpDelt: TD_1_day,
ExpErr: nil,
},
{
Spec: "30m",
ExpMult: 30,
ExpDelt: TD_1_min,
ExpErr: nil,
},
{
Spec: "5Y",
ExpMult: 5,
ExpDelt: TD_1_day * 365,
ExpErr: nil,
},
{
Spec: "1W",
ExpMult: 1,
ExpDelt: TD_1_week,
ExpErr: nil,
},
{
Spec: "m",
ExpMult: 1,
ExpDelt: TD_1_min,
ExpErr: nil,
},
{
Spec: "1",
ExpMult: 0,
ExpDelt: 0,
ExpErr: fmt.Errorf("no timedelta configured for abbreviation of \"\""),
},
} {
mult, delt, err := ParseSpec(test.Spec)
require.Equal(t, test.ExpErr, err, "for test #%d", idx)
require.Equal(t, test.ExpMult, mult, "for test #%d", idx)
require.Equal(t, test.ExpDelt, delt, "for test #%d", idx)
}
}

0 comments on commit c638509

Please sign in to comment.