-
Notifications
You must be signed in to change notification settings - Fork 34
Fix basic pipeline test + Pipeline benchmark #104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| /* | ||
| * Copyright (C) 2021 IBM, Inc. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| * | ||
| */ | ||
|
|
||
| package ingest | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "fmt" | ||
| "os" | ||
|
|
||
| "github.com/netobserv/flowlogs2metrics/pkg/config" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/pipeline/utils" | ||
| log "github.com/sirupsen/logrus" | ||
| ) | ||
|
|
||
| const chunkLines = 100 | ||
|
|
||
| // FileChunks ingest entries from a file and resends them in chunks of fixed number of lines. | ||
| // It might be used to test processing speed in pipelines. | ||
| type FileChunks struct { | ||
| fileName string | ||
| PrevRecords []interface{} | ||
| TotalRecords int | ||
| } | ||
|
|
||
| func (r *FileChunks) Ingest(process ProcessFunction) { | ||
| lines := make([]interface{}, 0, chunkLines) | ||
| file, err := os.Open(r.fileName) | ||
| if err != nil { | ||
| log.Fatal(err) | ||
| } | ||
| defer func() { | ||
| _ = file.Close() | ||
| }() | ||
|
|
||
| scanner := bufio.NewScanner(file) | ||
| nLines := 0 | ||
| for scanner.Scan() { | ||
| text := scanner.Text() | ||
| lines = append(lines, text) | ||
| nLines++ | ||
| if nLines%chunkLines == 0 { | ||
| r.PrevRecords = lines | ||
| r.TotalRecords += len(lines) | ||
| process(lines) | ||
| // reset slice length without deallocating/reallocating memory | ||
| lines = lines[:0] | ||
| } | ||
| } | ||
| if len(lines) > 0 { | ||
| r.PrevRecords = lines | ||
| r.TotalRecords += len(lines) | ||
| process(lines) | ||
| } | ||
| } | ||
|
|
||
| // NewFileChunks create a new ingester that sends entries in chunks of fixed number of lines. | ||
| func NewFileChunks() (Ingester, error) { | ||
| log.Debugf("entering NewIngestFile") | ||
| if config.Opt.PipeLine.Ingest.File.Filename == "" { | ||
| return nil, fmt.Errorf("ingest filename not specified") | ||
| } | ||
|
|
||
| log.Infof("input file name = %s", config.Opt.PipeLine.Ingest.File.Filename) | ||
|
|
||
| ch := make(chan bool, 1) | ||
| utils.RegisterExitChannel(ch) | ||
| return &FileChunks{ | ||
| fileName: config.Opt.PipeLine.Ingest.File.Filename, | ||
| }, nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,15 +18,18 @@ | |
| package pipeline | ||
|
|
||
| import ( | ||
| "github.com/json-iterator/go" | ||
| "testing" | ||
|
|
||
| "github.com/sirupsen/logrus" | ||
|
|
||
| jsoniter "github.com/json-iterator/go" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/config" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/pipeline/decode" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/pipeline/ingest" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/pipeline/transform" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/pipeline/write" | ||
| "github.com/netobserv/flowlogs2metrics/pkg/test" | ||
| "github.com/stretchr/testify/require" | ||
| "testing" | ||
| ) | ||
|
|
||
| func Test_transformToLoki(t *testing.T) { | ||
|
|
@@ -54,18 +57,19 @@ pipeline: | |
| transform: | ||
| - type: generic | ||
| generic: | ||
| - input: Bytes | ||
| output: fl2m_bytes | ||
| - input: DstAddr | ||
| output: fl2m_dstAddr | ||
| - input: DstPort | ||
| output: fl2m_dstPort | ||
| - input: Packets | ||
| output: fl2m_packets | ||
| - input: SrcAddr | ||
| output: fl2m_srcAddr | ||
| - input: SrcPort | ||
| output: fl2m_srcPort | ||
| rules: | ||
| - input: Bytes | ||
| output: fl2m_bytes | ||
| - input: DstAddr | ||
| output: fl2m_dstAddr | ||
| - input: DstPort | ||
| output: fl2m_dstPort | ||
| - input: Packets | ||
| output: fl2m_packets | ||
| - input: SrcAddr | ||
| output: fl2m_srcAddr | ||
| - input: SrcPort | ||
| output: fl2m_srcPort | ||
| extract: | ||
| type: none | ||
| encode: | ||
|
|
@@ -75,24 +79,9 @@ pipeline: | |
| ` | ||
|
|
||
| func Test_SimplePipeline(t *testing.T) { | ||
| var json = jsoniter.ConfigCompatibleWithStandardLibrary | ||
| var mainPipeline *Pipeline | ||
| var err error | ||
| var b []byte | ||
| v := test.InitConfig(t, configTemplate) | ||
| config.Opt.PipeLine.Ingest.Type = "file" | ||
| config.Opt.PipeLine.Decode.Type = "json" | ||
| config.Opt.PipeLine.Extract.Type = "none" | ||
| config.Opt.PipeLine.Encode.Type = "none" | ||
| config.Opt.PipeLine.Write.Type = "none" | ||
| config.Opt.PipeLine.Ingest.File.Filename = "../../hack/examples/ocp-ipfix-flowlogs.json" | ||
| loadGlobalConfig(t) | ||
|
|
||
| val := v.Get("pipeline.transform\n") | ||
| b, err = json.Marshal(&val) | ||
| require.NoError(t, err) | ||
| config.Opt.PipeLine.Transform = string(b) | ||
|
|
||
| mainPipeline, err = NewPipeline() | ||
| mainPipeline, err := NewPipeline() | ||
| require.NoError(t, err) | ||
|
|
||
| // The file ingester reads the entire file, pushes it down the pipeline, and then exits | ||
|
|
@@ -102,6 +91,54 @@ func Test_SimplePipeline(t *testing.T) { | |
| ingester := mainPipeline.Ingester.(*ingest.IngestFile) | ||
| decoder := mainPipeline.Decoder.(*decode.DecodeJson) | ||
| writer := mainPipeline.Writer.(*write.WriteNone) | ||
| require.Equal(t, 5103, len(ingester.PrevRecords)) | ||
| require.Equal(t, len(ingester.PrevRecords), len(decoder.PrevRecords)) | ||
| require.Equal(t, len(ingester.PrevRecords), len(writer.PrevRecords)) | ||
|
|
||
| // checking that the processing is done for at least the first line of the logs | ||
| require.Equal(t, ingester.PrevRecords[0], decoder.PrevRecords[0]) | ||
| // values checked from the first line of the ../../hack/examples/ocp-ipfix-flowlogs.json file | ||
| require.Equal(t, config.GenericMap{ | ||
| "fl2m_bytes": float64(20800), | ||
| "fl2m_dstAddr": "10.130.2.2", | ||
| "fl2m_dstPort": float64(36936), | ||
| "fl2m_packets": float64(400), | ||
| "fl2m_srcAddr": "10.130.2.13", | ||
| "fl2m_srcPort": float64(3100), | ||
| }, writer.PrevRecords[0]) | ||
| } | ||
|
|
||
| func BenchmarkPipeline(b *testing.B) { | ||
| logrus.StandardLogger().SetLevel(logrus.ErrorLevel) | ||
| t := &testing.T{} | ||
| loadGlobalConfig(t) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mariomac can you look on https://github.com/netobserv/flowlogs-pipeline/blob/main/Makefile#L88 --- maybe we can somehow improve this >>> ???? I started to create some benchmark area for the project and I totally agree we need to improve that
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mariomac In any event, can we agree to create dedicated go files just for benchmark and split from the rest of the tests so we can run those stand-alone??
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had a look but doesn't seem to work for me... on each invocation I got: So I decided to create also a very pipeline-specific test to compare the part we are evaluating to change.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With respect to your second question, we could do that if you prefer it. Anyway benchmarks are not run by default even if they are in the same file as the tests. If you mean skipping tests when you run benchmarks, you can add the But I'm fine if you feel it's better organizing everyting in the same benchmarks file.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eranra I see what's happening with the benchmark in Go benchmarks are more a sort of "micro-benchmarks" aimed to test some parts of the code, and that's why they are usually located in the test files of the components that they are benchmarking. For example, in the benchmark of this PR, it just tests the time of sending and processing a file of ~5000 flows with a very simple dummy pipeline (no real ingest, no real writing...), but it allows us measuring the impact of the sequential vs parallel pipeline mechanism. I'd suggest to (in another PR to not loose the focus of our current task) remove the current
In the future, this could be improved e.g. spinning parallel clients |
||
| config.Opt.PipeLine.Ingest.Type = "file_chunks" | ||
| if t.Failed() { | ||
| b.Fatalf("unexpected error loading config") | ||
| } | ||
| for n := 0; n < b.N; n++ { | ||
| b.StopTimer() | ||
| p, err := NewPipeline() | ||
| if err != nil { | ||
| t.Fatalf("unexpected error %s", err) | ||
| } | ||
| b.StartTimer() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the timing information used?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| p.Run() | ||
| } | ||
| } | ||
|
|
||
| func loadGlobalConfig(t *testing.T) { | ||
| var json = jsoniter.ConfigCompatibleWithStandardLibrary | ||
| v := test.InitConfig(t, configTemplate) | ||
| config.Opt.PipeLine.Ingest.Type = "file" | ||
| config.Opt.PipeLine.Decode.Type = "json" | ||
| config.Opt.PipeLine.Extract.Type = "none" | ||
| config.Opt.PipeLine.Encode.Type = "none" | ||
| config.Opt.PipeLine.Write.Type = "none" | ||
| config.Opt.PipeLine.Ingest.File.Filename = "../../hack/examples/ocp-ipfix-flowlogs.json" | ||
|
|
||
| val := v.Get("pipeline.transform") | ||
| b, err := json.Marshal(&val) | ||
| require.NoError(t, err) | ||
| config.Opt.PipeLine.Transform = string(b) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
goland editor will re-order these imports to be in alphabetical order.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
netobservwe usually sort the imports withgoimports, which is independent of the IDE (many team members use VScode). It can be configured to be used from Goland/IDEA too:But if this is an inconvenience for you I can adapt to use the default Goland. WDYT @jotak @jpinsonneau @OlivierCazade ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm also using
goimports(configured in vscode)