Skip to content

Commit

Permalink
fix: clean up UTs for file metrics collector (#2285)
Browse files Browse the repository at this point in the history
* chore: replace testDir with tempDir.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* fix: expose and compare errors.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* refactor: integrate test generation func into testCases.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* refactor: update error comparing mechanism.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* fix: make some changes under the review of yuki.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

---------

Signed-off-by: Electronic-Waste <2690692950@qq.com>
  • Loading branch information
Electronic-Waste committed Apr 3, 2024
1 parent 9680b8c commit 7df05c2
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 166 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package sidecarmetricscollector

import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
Expand All @@ -34,15 +35,27 @@ import (
"github.com/kubeflow/katib/pkg/metricscollector/v1beta1/common"
)

var (
errFileFormat = fmt.Errorf("format must be set %v or %v", commonv1beta1.TextFormat, commonv1beta1.JsonFormat)
errOpenFile = errors.New("failed to open the file")
errReadFile = errors.New("failed to read the file")
errParseJson = errors.New("failed to parse the json object")
)

func CollectObservationLog(fileName string, metrics []string, filters []string, fileFormat commonv1beta1.FileFormat) (*v1beta1.ObservationLog, error) {
// we should check fileFormat first in case of opening an invalid file
if fileFormat != commonv1beta1.JsonFormat && fileFormat != commonv1beta1.TextFormat {
return nil, errFileFormat
}

file, err := os.Open(fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errOpenFile, err.Error())
}
defer file.Close()
content, err := io.ReadAll(file)
if err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errReadFile, err.Error())
}
logs := string(content)

Expand All @@ -51,8 +64,9 @@ func CollectObservationLog(fileName string, metrics []string, filters []string,
return parseLogsInTextFormat(strings.Split(logs, "\n"), metrics, filters)
case commonv1beta1.JsonFormat:
return parseLogsInJsonFormat(strings.Split(logs, "\n"), metrics)
default:
return nil, nil
}
return nil, fmt.Errorf("format must be set %v or %v", commonv1beta1.TextFormat, commonv1beta1.JsonFormat)
}

func parseLogsInTextFormat(logs []string, metrics []string, filters []string) (*v1beta1.ObservationLog, error) {
Expand Down Expand Up @@ -120,7 +134,7 @@ func parseLogsInJsonFormat(logs []string, metrics []string) (*v1beta1.Observatio
}
var jsonObj map[string]interface{}
if err := json.Unmarshal([]byte(logline), &jsonObj); err != nil {
return nil, err
return nil, fmt.Errorf("%w: %s", errParseJson, err.Error())
}

timestamp := time.Time{}.UTC().Format(time.RFC3339)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,52 +23,29 @@ import (
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
commonv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/common/v1beta1"
v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
)

const (
validJSONTestFile = "good.json"
invalidFormatJSONTestFile = "invalid-format.json"
invalidTimestampJSONTestFile = "invalid-timestamp.json"
missingMetricJSONTestFile = "missing-objective-metric.json"

validTEXTTestFile = "good.log"
invalidValueTEXTTestFile = "invalid-value.log"
invalidFormatTEXTTestFile = "invalid-format.log"
invalidTimestampTEXTTestFile = "invalid-timestamp.log"
missingMetricTEXTTestFile = "missing-objective-metric.log"
)

var (
testDir = "testdata"
testJsonDataPath = filepath.Join(testDir, "JSON")
testTextDataPath = filepath.Join(testDir, "TEXT")
)

func TestCollectObservationLog(t *testing.T) {
if err := generateTestDirs(); err != nil {
t.Fatal(err)
}
if err := generateJSONTestFiles(); err != nil {
t.Fatal(err)
}
if err := generateTEXTTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(testDir)

testCases := map[string]struct {
filePath string
fileName string
testData string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
wantError error
expected *v1beta1.ObservationLog
}{
"Positive case for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, validJSONTestFile),
fileName: "good.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161Z", "trial": "0"}
{"acc": "0.9586416482925415", "checkpoint_path": "", "global_step": "1", "timestamp": "2021-12-02T14:27:50.000037459Z", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
metrics: []string{"acc", "loss"},
fileFormat: commonv1beta1.JsonFormat,
expected: &v1beta1.ObservationLog{
Expand Down Expand Up @@ -112,7 +89,14 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Positive case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, validTEXTTestFile),
fileName: "good.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 100}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -0.4759}
{metricName: loss, metricValue: 0.8671}`,
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
Expand Down Expand Up @@ -178,7 +162,11 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidValueTEXTTestFile),
fileName: "invalid-value.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: .333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: - 345.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.}`,
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -195,21 +183,26 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid file name": {
filePath: "invalid",
err: true,
fileName: "invalid",
fileFormat: commonv1beta1.JsonFormat,
wantError: errOpenFile,
},
"Invalid file format": {
filePath: filepath.Join(testTextDataPath, validTEXTTestFile),
fileName: "good.log",
fileFormat: "invalid",
err: true,
wantError: errFileFormat,
},
"Invalid formatted file for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, invalidFormatJSONTestFile),
fileName: "invalid-format.json",
testData: `"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0`,
fileFormat: commonv1beta1.JsonFormat,
err: true,
wantError: errParseJson,
},
"Invalid formatted file for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidFormatTEXTTestFile),
fileName: "invalid-format.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}`,
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
metrics: []string{"accuracy", "loss"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -226,7 +219,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid timestamp for logs in JSON format": {
filePath: filepath.Join(testJsonDataPath, invalidTimestampJSONTestFile),
fileName: "invalid-timestamp.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": "invalid", "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847, "trial": "0"}`,
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -249,7 +244,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Invalid timestamp for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, invalidTimestampTEXTTestFile),
fileName: "invalid-timestamp.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
Expand All @@ -273,7 +270,10 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Missing objective metric in JSON training logs": {
filePath: filepath.Join(testJsonDataPath, missingMetricJSONTestFile),
fileName: "missing-objective-metric.json",
testData: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161+09:00", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
fileFormat: commonv1beta1.JsonFormat,
metrics: []string{"acc", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -289,7 +289,9 @@ func TestCollectObservationLog(t *testing.T) {
},
},
"Missing objective metric in TEXT training logs": {
filePath: filepath.Join(testTextDataPath, missingMetricTEXTTestFile),
fileName: "missing-objective-metric.log",
testData: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
fileFormat: commonv1beta1.TextFormat,
metrics: []string{"accuracy", "loss"},
expected: &v1beta1.ObservationLog{
Expand All @@ -306,126 +308,21 @@ func TestCollectObservationLog(t *testing.T) {
},
}

tmpDir := t.TempDir()
for name, test := range testCases {
t.Run(name, func(t *testing.T) {
actual, err := CollectObservationLog(test.filePath, test.metrics, test.filters, test.fileFormat)
if (err != nil) != test.err {
t.Errorf("\nGOT: \n%v\nWANT: %v\n", err, test.err)
} else {
if diff := cmp.Diff(test.expected, actual); diff != "" {
t.Errorf("Unexpected parsed result (-want,+got):\n%s", diff)
if test.testData != "" {
if err := os.WriteFile(filepath.Join(tmpDir, test.fileName), []byte(test.testData), 0600); err != nil {
t.Fatalf("failed to write test data: %v", err)
}
}
actual, err := CollectObservationLog(filepath.Join(tmpDir, test.fileName), test.metrics, test.filters, test.fileFormat)
if diff := cmp.Diff(test.wantError, err, cmpopts.EquateErrors()); len(diff) != 0 {
t.Errorf("Unexpected error (-want,+got):\n%s", diff)
}
if diff := cmp.Diff(test.expected, actual); len(diff) != 0 {
t.Errorf("Unexpected parsed result (-want,+got):\n%s", diff)
}
})
}
}

func generateTestDirs() error {
// Generate JSON files' dir
if _, err := os.Stat(testJsonDataPath); err != nil {
if err = os.MkdirAll(testJsonDataPath, 0700); err != nil {
return err
}
}

// Generate TEXT files' dir
if _, err := os.Stat(testTextDataPath); err != nil {
if err = os.MkdirAll(testTextDataPath, 0700); err != nil {
return err
}
}

return nil
}

func generateJSONTestFiles() error {
testData := []struct {
fileName string
data string
}{
{
fileName: validJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161Z", "trial": "0"}
{"acc": "0.9586416482925415", "checkpoint_path": "", "global_step": "1", "timestamp": "2021-12-02T14:27:50.000037459Z", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}
`,
},
{
fileName: invalidFormatJSONTestFile,
data: `"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847.287801, "trial": "0
`,
},
{
fileName: invalidTimestampJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": "invalid", "trial": "0"}
{"acc": "0.9349666833877563", "checkpoint_path": "", "global_step": "0", "timestamp": 1638422847, "trial": "0"}
`,
}, {
fileName: missingMetricJSONTestFile,
data: `{"checkpoint_path": "", "global_step": "0", "loss": "0.22082142531871796", "timestamp": 1638422847.28721, "trial": "0"}
{"checkpoint_path": "", "global_step": "1", "loss": "0.1414974331855774", "timestamp": "2021-12-02T14:27:50.000035161+09:00", "trial": "0"}
{"checkpoint_path": "", "global_step": "2", "loss": "0.10683439671993256", "trial": "0"}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testJsonDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

return nil
}

func generateTEXTTestFiles() error {
testData := []struct {
fileName string
data string
}{
{
fileName: validTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 100}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -0.4759}
{metricName: loss, metricValue: 0.8671}`,
},
{
fileName: invalidValueTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: .333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: -.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: - 345.333}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 888.}`,
},
{
fileName: invalidFormatTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: invalidTimestampTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: missingMetricTEXTTestFile,
data: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testTextDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

return nil
}

0 comments on commit 7df05c2

Please sign in to comment.