Skip to content

Commit

Permalink
perf(store): implement binary search to log scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
macrat committed Dec 23, 2022
1 parent aa158bf commit 94e9d48
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 15 deletions.
3 changes: 3 additions & 0 deletions internal/store/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ func newFileScanner(path string, since, until time.Time) (*fileScanner, error) {
if err != nil {
return nil, err
}
if err := searchLog(f, since.Add(-70*time.Minute), 10*1024); err != nil {
return nil, err
}
return &fileScanner{
file: f,
reader: bufio.NewReader(f),
Expand Down
93 changes: 93 additions & 0 deletions internal/store/search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package store

import (
"bytes"
"encoding/json"
"errors"
"io"
"time"

api "github.com/macrat/ayd/lib-ayd"
)

// searchLog seeks and searches a log file roughly.
// You should search the time at least 1 hour before from your actual target time, because the record ordering in log files might swap maximum 1 hour.
//
// The result can contains error up to `accuracy` bytes.
// If the `accuracy` is 0, the result is perfectly aligned to the begin of the found record.
func searchLog(f io.ReadSeeker, target time.Time, accuracy int64) error {
size, err := f.Seek(0, io.SeekEnd)
if err != nil {
return err
}
if size <= accuracy {
_, err := f.Seek(0, io.SeekStart)
return err
}

right := size
var left int64
mid := right / 2

buf := make([]byte, 256)

var rec struct {
Time string `json:"time"`
}

for left+accuracy < right {
if _, err := f.Seek(mid, io.SeekStart); err != nil {
return err
}

n, err := f.Read(buf[:])
if errors.Is(err, io.EOF) {
right = left + (right-left)/2
continue
} else if err != nil {
return err
}
line := buf[:n]
if idx := bytes.IndexRune(line, '\n'); idx < 0 {
buf = make([]byte, len(buf)*2)
continue
} else if idx == 0 {
mid += 1
continue
} else {
line = line[:idx]
}

if line[0] == '{' {
var t time.Time
err = json.Unmarshal(line, &rec)
if err == nil {
t, err = api.ParseTime(rec.Time)
}

if err == nil {
if target.After(t) {
left = mid + int64(len(line)) + 1
} else {
right = left + (right-left)/2
}

mid = left + (right-left)/2
continue
}
}

// The current position is not valid JSON.
mid += int64(len(line)) + 1
if mid >= right {
right = left + (right-left)/2
mid = left + (right-left)/2
}
}

if _, err := f.Seek(left, io.SeekStart); err != nil {
return err
}

return nil
}
127 changes: 127 additions & 0 deletions internal/store/search_internal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package store

import (
"fmt"
"os"
"path/filepath"
"testing"
"time"

api "github.com/macrat/ayd/lib-ayd"
)

func Test_searchLog(t *testing.T) {
t.Parallel()

type Test struct {
target time.Time
pos int64
}
tests := []struct {
fname string
tests []Test
}{
{
"../testutil/testdata/test.log",
[]Test{
{time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), 0},
{time.Date(2100, 1, 1, 0, 0, 0, 0, time.UTC), 980},
{time.Date(2021, 1, 2, 15, 4, 5, 0, time.UTC), 0},
{time.Date(2021, 1, 2, 15, 4, 6, 0, time.UTC), 261},
{time.Date(2021, 1, 2, 15, 4, 7, 0, time.UTC), 538},
{time.Date(2021, 1, 2, 15, 4, 8, 0, time.UTC), 669},
{time.Date(2021, 1, 2, 15, 4, 9, 0, time.UTC), 817},
},
},
{
"./testdata/long-line.log",
[]Test{
{time.Date(2022, 1, 1, 0, 0, 0, 0, time.UTC), 0},
{time.Date(2022, 1, 1, 1, 0, 0, 0, time.UTC), 514},
{time.Date(2022, 1, 1, 2, 0, 0, 0, time.UTC), 1028},
{time.Date(2022, 1, 1, 3, 0, 0, 0, time.UTC), 1542},
{time.Date(2022, 1, 1, 4, 0, 0, 0, time.UTC), 2056},
{time.Date(2022, 1, 1, 5, 0, 0, 0, time.UTC), 2570},
{time.Date(2022, 1, 1, 6, 0, 0, 0, time.UTC), 3596},
{time.Date(2022, 1, 1, 7, 0, 0, 0, time.UTC), 4622},
},
},
{
"./testdata/empty-line.log",
[]Test{
{time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC), 0},
{time.Date(2001, 1, 1, 0, 0, 0, 0, time.UTC), 111},
{time.Date(2022, 1, 1, 0, 0, 0, 0, time.UTC), 229},
{time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC), 360},
},
},
}

for _, tt := range tests {
fname := tt.fname
tests := tt.tests

t.Run(fname, func(t *testing.T) {
f, err := os.Open(fname)
if err != nil {
t.Fatalf("failed to open log: %s", err)
}
defer f.Close()

for _, tt := range tests {
err := searchLog(f, tt.target, 0)
if err != nil {
t.Errorf("%s: unexpected error: %s", tt.target, err)
continue
}

pos, err := f.Seek(0, os.SEEK_CUR)
if err != nil {
t.Errorf("%s: failed to current position: %s", tt.target, err)
continue
}
if pos != tt.pos {
t.Errorf("%s: unexpected position: want=%d actual=%d", tt.target, tt.pos, pos)
}
}
})
}
}

func Benchmark_searchLog(b *testing.B) {
p := filepath.Join(b.TempDir(), "test.log")

baseTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
recordsNum := 10000
logSize := 0

f, err := os.Create(p)
if err != nil {
b.Fatalf("failed to prepare test log: %s", err)
}
for i := 0; i < recordsNum; i++ {
r := api.Record{
Time: baseTime.Add(time.Duration(i) * time.Minute),
Message: fmt.Sprintf("record %d", i),
}
if n, err := f.Write([]byte(r.String() + "\n")); err != nil {
b.Fatalf("failed to prepare test log: %s", err)
} else {
logSize += n
}
}
f.Close()

b.SetBytes(int64(logSize))

f, err = os.Open(p)
if err != nil {
b.Fatalf("failed to open test log: %s", err)
}
defer f.Close()

b.ResetTimer()
for i := 0; i < b.N; i++ {
searchLog(f, baseTime.Add(time.Duration(i*1234%recordsNum)*time.Minute), 1024)
}
}
2 changes: 1 addition & 1 deletion internal/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const (
)

var (
LogRestoreBytes = int64(100 * 1024 * 1024)
LogRestoreBytes = int64(10 * 1024 * 1024)
)

type RecordHandler func(api.Record)
Expand Down
30 changes: 30 additions & 0 deletions internal/store/testdata/empty-line.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{"time":"2000-01-01T00:00:00Z","status":"HEALTHY","latency":123.456,"target":"dummy:","message":"hello world"}







{"time":"2001-01-01T00:00:00Z","status":"HEALTHY","latency":123.456,"target":"dummy:","message":"hello world"}




















{"time":"2022-01-01T00:00:00Z","status":"HEALTHY","latency":123.456,"target":"dummy:","message":"hello world"}
8 changes: 8 additions & 0 deletions internal/store/testdata/long-line.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{"time":"2022-01-01T00:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T01:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T02:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T03:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T04:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T05:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T06:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
{"time":"2022-01-01T07:00:00Z", "status":"HEALTHY", "latency":123.456, "target":"dummy:", "message":"this is a long long record", "extra": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"}
35 changes: 21 additions & 14 deletions lib-ayd/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,48 @@ import (
)

var (
dateformats = []string{
timeformats []string

ErrInvalidTime = errors.New("invalid format")
)

func init() {
dfs := []string{
"2006-01-02T",
"2006-01-02_",
"2006-01-02 ",
"20060102 ",
"20060102T",
"20060102_",
}
timeformats = []string{
tfs := []string{
"15:04:05",
"15:04:05.999999999",
"150405",
"150405.999999999",
}
zoneformats = []string{
zfs := []string{
"Z07:00",
"Z0700",
"Z07",
}

ErrInvalidTime = errors.New("invalid format")
)
for _, df := range dfs {
for _, tf := range tfs {
for _, zf := range zfs {
timeformats = append(timeformats, df+tf+zf)
}
}
}
}

// ParseTime parses time string in Ayd way.
// This function supports RFC3339 and some variant formats.
func ParseTime(s string) (time.Time, error) {
x := strings.ToUpper(strings.TrimSpace(s))
for _, df := range dateformats {
for _, tf := range timeformats {
for _, zf := range zoneformats {
t, err := time.Parse(df+tf+zf, x)
if err == nil {
return t, nil
}
}
for _, f := range timeformats {
t, err := time.Parse(f, x)
if err == nil {
return t, nil
}
}
return time.Time{}, fmt.Errorf("%w: %q", ErrInvalidTime, s)
Expand Down

0 comments on commit 94e9d48

Please sign in to comment.