Skip to content

Commit

Permalink
fix, Explicitly track if a $language/plain repository was solved caus…
Browse files Browse the repository at this point in the history
…e problem-counting approach can disqualify succeeding runs

Part of #127
  • Loading branch information
bauersimon committed May 17, 2024
1 parent ccfc16a commit 6fb9a9e
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 5 deletions.
18 changes: 13 additions & 5 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
// Ensure we report metrics for every model even if they are excluded.
assessments := report.NewAssessmentPerModelPerLanguagePerRepository(maps.Values(modelsSelected), maps.Values(languagesSelected), command.Repositories)
problemsPerModel := map[string][]error{}
modelSucceededPlainRepositoryOfLanguage := map[model.Model]map[language.Language]bool{}
{
for r := uint(0); r < command.Runs; r++ {
if command.Runs > 1 {
Expand All @@ -252,6 +253,10 @@ func (command *Evaluate) Execute(args []string) (err error) {
for _, languageID := range command.Languages {
for _, modelID := range command.Models {
model := modelsSelected[modelID]
if modelSucceededPlainRepositoryOfLanguage[model] == nil {
modelSucceededPlainRepositoryOfLanguage[model] = map[language.Language]bool{}
}

language := languagesSelected[languageID]

repositoryPath := filepath.Join(languageID, repositoryPlainName)
Expand All @@ -262,8 +267,10 @@ func (command *Evaluate) Execute(args []string) (err error) {
ps = append(ps, err)
}
if len(ps) > 0 {
log.Printf("Excluding model %q since it was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
} else {
modelSucceededPlainRepositoryOfLanguage[model][language] = modelSucceededPlainRepositoryOfLanguage[model][language] || true
}
}
}
Expand Down Expand Up @@ -297,12 +304,13 @@ func (command *Evaluate) Execute(args []string) (err error) {
}

for _, modelID := range command.Models {
if len(problemsPerModel[modelID]) > 0 {
continue
}

model := modelsSelected[modelID]
language := languagesSelected[languageID]
if !modelSucceededPlainRepositoryOfLanguage[model][language] {
log.Printf("Skipping model %q cause it never solved the %q repository", model.ID(), filepath.Join(language.ID(), repositoryPlainName))

continue
}

assessment, ps, err := evaluate.Repository(command.logger, command.ResultPath, model, language, command.TestdataPath, repositoryPath)
assessments[model][language][repositoryPath].Add(assessment)
Expand Down
175 changes: 175 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -815,4 +815,179 @@ func TestEvaluateExecute(t *testing.T) {
})
}
})

t.Run("Plain evaluation", func(t *testing.T) {
{
// Setup provider and model mocking.
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Problems of previous runs shouldn't cancel successive runs",

Before: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
require.NoError(t, osutil.CopyTree(filepath.Join(testdataPath, "golang", "plain"), filepath.Join(testdataPath, "golang", "plain2")))

provider.Register(providerMock)

// Succeed on both "plain" runs.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()

// Error on the first run for the "plain2" repository.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error")).Once()
// Succeed on the second run for the "plain2" repository.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
},
After: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 4)
},

Arguments: []string{
"--language", "golang",
"--repository", filepath.Join("golang", "plain2"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": nil,
"golang-summed.csv": func(t *testing.T, filePath, data string) {
_ = validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 3, // Three files were executed successfully, for two "plain" runs and one "plain2" run.
metrics.AssessmentKeyResponseNoError: 3,
},
}, []uint{6})
},
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "golang", "golang", "plain.log"): nil,
filepath.Join("testing-provider_testing-model", "golang", "golang", "plain2.log"): nil,
},
})
}
{
// Setup provider and model mocking.
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Solving Plain once is enough",

Before: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
require.NoError(t, osutil.CopyTree(filepath.Join(testdataPath, "golang", "plain"), filepath.Join(testdataPath, "golang", "plain2")))

provider.Register(providerMock)

// Succeed on only one "plain" run.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error")).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()

// Succeed on both "plain2" runs.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
},
After: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 4)
},

Arguments: []string{
"--language", "golang",
"--repository", filepath.Join("golang", "plain2"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Model \"testing-provider/testing-model\" was not able to solve the \""+filepath.Join("golang", "plain")+"\" repository")
},
"golang-summed.csv": func(t *testing.T, filePath, data string) {
_ = validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 3, // Three files were executed successfully, for one "plain" run and two "plain2" runs.
metrics.AssessmentKeyResponseNoError: 3,
},
}, []uint{6})
},
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "golang", "golang", "plain.log"): nil,
filepath.Join("testing-provider_testing-model", "golang", "golang", "plain2.log"): nil,
},
})
}
{
// Setup provider and model mocking.
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Never solving Plain leads to exclusion",

Before: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
require.NoError(t, osutil.CopyTree(filepath.Join(testdataPath, "golang", "plain"), filepath.Join(testdataPath, "golang", "plain2")))

provider.Register(providerMock)

// Error on every "plain" run.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error"))
},
After: func(t *testing.T, logger *log.Logger, resultPath string, testdataPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 2)
},

Arguments: []string{
"--language", "golang",
"--repository", filepath.Join("golang", "plain2"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": func(t *testing.T, filePath, data string) {
assert.Equal(t, 2, strings.Count(data, "Skipping model \"testing-provider/testing-model\" cause it never solved the \""+filepath.Join("golang", "plain")+"\" repository"))
},
"golang-summed.csv": nil,
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "golang", "golang", "plain.log"): nil,
},
})
}
})
}

0 comments on commit 6fb9a9e

Please sign in to comment.