Skip to content

Commit

Permalink
fix, Explicitly track if a $language/plain repository was solved caus…
Browse files Browse the repository at this point in the history
…e problem-counting approach can disqualify succeeding runs

Part of #127
  • Loading branch information
bauersimon authored and zimmski committed May 18, 2024
1 parent 8434368 commit e4951b5
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 5 deletions.
18 changes: 13 additions & 5 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ func (command *Evaluate) Execute(args []string) (err error) {
}

// Check that models and languages can be evaluated by executing the "plain" repositories.
modelSucceededBasicChecksOfLanguage := map[model.Model]map[language.Language]bool{}
log.Printf("Checking that models and languages can be used for evaluation")
// Ensure we report metrics for every model even if they are excluded.
assessments := report.NewAssessmentPerModelPerLanguagePerRepository(maps.Values(modelsSelected), maps.Values(languagesSelected), command.Repositories)
Expand All @@ -254,6 +255,10 @@ func (command *Evaluate) Execute(args []string) (err error) {
for _, languageID := range command.Languages {
for _, modelID := range command.Models {
model := modelsSelected[modelID]
if modelSucceededBasicChecksOfLanguage[model] == nil {
modelSucceededBasicChecksOfLanguage[model] = map[language.Language]bool{}
}

language := languagesSelected[languageID]

repositoryPath := filepath.Join(languageID, repositoryPlainName)
Expand All @@ -264,8 +269,10 @@ func (command *Evaluate) Execute(args []string) (err error) {
ps = append(ps, err)
}
if len(ps) > 0 {
log.Printf("Excluding model %q since it was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps)
problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...)
} else {
modelSucceededBasicChecksOfLanguage[model][language] = true
}
}
}
Expand Down Expand Up @@ -299,12 +306,13 @@ func (command *Evaluate) Execute(args []string) (err error) {
}

for _, modelID := range command.Models {
if len(problemsPerModel[modelID]) > 0 {
continue
}

model := modelsSelected[modelID]
language := languagesSelected[languageID]
if !modelSucceededBasicChecksOfLanguage[model][language] {
log.Printf("Excluding model %q for language %q cause it did not succeed basic checks", model.ID(), language.ID())

continue
}

assessment, ps, err := evaluate.Repository(command.logger, command.ResultPath, model, language, command.TestdataPath, repositoryPath)
assessments[model][language][repositoryPath].Add(assessment)
Expand Down
159 changes: 159 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -811,4 +811,163 @@ func TestEvaluateExecute(t *testing.T) {
})
}
})

t.Run("Basic language checks should include or exclude model", func(t *testing.T) {
{
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Problems of previous runs shouldn't cancel successive runs",

Before: func(t *testing.T, logger *log.Logger, resultPath string) {
provider.Register(providerMock)

// Succeed on both "plain" runs.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()

// Error on the first run for the "light" repository.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error")).Once()
// Succeed on the second run for the "light" repository.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
},
After: func(t *testing.T, logger *log.Logger, resultPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 4)
},

Arguments: []string{
"--repository", filepath.Join("java", "light"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": nil,
"golang-summed.csv": func(t *testing.T, filePath, data string) {
_ = validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 3, // Three files were executed successfully: one "plain" run and two "light" runs.
metrics.AssessmentKeyResponseNoError: 3,
},
}, []uint64{6})
},
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "java", "plain", "plain.log"): nil,
filepath.Join("testing-provider_testing-model", "java", "light", "plain.log"): nil,
},
})
}
{
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Solving basic checks once is enough",

Before: func(t *testing.T, logger *log.Logger, resultPath string) {
provider.Register(providerMock)

// Succeed on only one "plain" run.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error")).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()

// Succeed on both "light" runs.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}).Once()
},
After: func(t *testing.T, logger *log.Logger, resultPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 4)
},

Arguments: []string{
"--repository", filepath.Join("java", "light"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": nil,
"golang-summed.csv": func(t *testing.T, filePath, data string) {
_ = validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 3, // Three files were executed successfully: one "plain" run and two "light" runs.
metrics.AssessmentKeyResponseNoError: 3,
},
}, []uint64{6})
},
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "java", "plain", "plain.log"): nil,
filepath.Join("testing-provider_testing-model", "java", "light", "plain.log"): nil,
},
})
}
{
modelMock := modeltesting.NewMockModelNamed(t, "testing-provider/testing-model")
providerMock := providertesting.NewMockProviderNamedWithModels(t, "testing-provider", []model.Model{modelMock})
validate(t, &testCase{
Name: "Never solving basic checks leads to exclusion",

Before: func(t *testing.T, logger *log.Logger, resultPath string) {
provider.Register(providerMock)

// Error on every "plain" run.
modelMock.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, errors.New("some error"))
},
After: func(t *testing.T, logger *log.Logger, resultPath string) {
delete(provider.Providers, "testing-provider")

modelMock.AssertNumberOfCalls(t, "GenerateTestsForFile", 2)
},

Arguments: []string{
"--repository", filepath.Join("java", "light"),
"--model", "testing-provider/testing-model",
"--runs", "2",
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
"categories.svg": nil,
"evaluation.csv": nil,
"evaluation.log": nil,
"golang-summed.csv": nil,
"models-summed.csv": nil,
"README.md": nil,
filepath.Join("testing-provider_testing-model", "java", "light", "light.log"): nil,
},
})
}
})
}

0 comments on commit e4951b5

Please sign in to comment.