-
Notifications
You must be signed in to change notification settings - Fork 0
/
error_quotient.go
294 lines (244 loc) · 8.84 KB
/
error_quotient.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
package errorquotient
import (
"database/sql"
"fmt"
"strings"
"github.com/nedpals/bugbuddy/server/logger"
"github.com/nedpals/bugbuddy/server/logger/analyzer"
"github.com/nedpals/bugbuddy/server/logger/analyzer/internal"
"github.com/sergi/go-diff/diffmatchpatch"
)
const KEY = "error_quotient"
type CompilationEvent struct {
ErrorType int
TimeDelta int // This is the 'T' from your description.
CharDelta int // This is the 'Ch' from your description.
Location string
}
func scoreEventPair(event1, event2 CompilationEvent) int {
score := 0
// Check if both events have errors.
if event1.ErrorType != 0 && event2.ErrorType != 0 {
score += 2 // Both have errors, add 2.
// Check if both events have the SAME error type.
if event1.ErrorType == event2.ErrorType {
score += 3 // Same error type, add 3.
// Check if both events have errors at the SAME location.
if event1.Location == event2.Location {
score += 3 // Same location, add 3.
}
}
} else if event1.ErrorType != 0 || event2.ErrorType != 0 {
// If ONLY ONE of the events is an error, it seems you want to add 1 to the score.
// However, if you're consistently getting 1 for pairs that should possibly score higher,
// it might be worth revisiting how ErrorType and Location are being determined and compared.
// As per your description, if they always return 1, it indicates they never match in type and location for scoring 8.
// For this rule, ensure ErrorType and Location accurately reflect the intended comparisons.
score += 1 // Only one event is an error, add 1.
}
return score
}
func normalizeScore(score int) float64 {
return float64(score) / 9.0
}
func calculateEQ(events []CompilationEvent) float64 {
var totalScore float64
pairCount := len(events) - 1
if pairCount <= 0 {
return 0
}
for i := 0; i < pairCount; i++ {
pairScore := scoreEventPair(events[i], events[i+1])
normalizedScore := normalizeScore(pairScore)
totalScore += normalizedScore
}
eq := totalScore / float64(pairCount)
return eq
}
// ErrorTypeConversion would convert the ErrorCode to the ErrType used in the EQ calculation.
// For the purpose of this example, assume that any ErrorCode != 0 is an error.
func ErrorTypeConversion(errorCode int) int {
if errorCode != 0 {
return 1 // Simplified: 1 represents an error for demonstration purposes.
}
return 0
}
// Function to calculate CharDelta by comparing two versions of the same file
func CalculateCharDeltaAndLocation(log *logger.Logger, filepath string, version1, version2 logger.LogEntry) (charDelta int, location string, err error) {
if version1.FileVersion == 0 && strings.HasPrefix(version1.GeneratedOutput, "# UnknownError") {
// This is a special case where the file is not found, and the error message is "# UnknownError"
return 0, "", nil
}
// open the files first
content1, err := log.OpenVersionedFileFromPID(version1.ParticipantId, filepath, version1.FileVersion)
if err != nil {
if err == sql.ErrNoRows {
if version1.ErrorCode != 0 {
return 0, "", fmt.Errorf("(%s) file not found: %s (version: %d)", version1.ParticipantId, filepath, version1.FileVersion)
}
// Just ignore and use the contents from version2
} else {
return 0, "", err
}
}
content2, err := log.OpenVersionedFileFromPID(version2.ParticipantId, filepath, version2.FileVersion)
if err != nil {
if err == sql.ErrNoRows {
if version2.ErrorCode != 0 {
return 0, "", fmt.Errorf("file not found: %s (version: %d)", filepath, version2.FileVersion)
}
// Use latest version if the file is not found and error code is 0
latestVersion, verErr := log.LatestVersionFromFile(filepath)
if latestVersion == -1 {
return 0, "", verErr
}
newContent2, err := log.OpenVersionedFileFromPID(version2.ParticipantId, filepath, latestVersion)
if err != nil {
if err == sql.ErrNoRows {
return 0, "", fmt.Errorf("(2nd) file not found: %s (version: %d)", filepath, latestVersion)
}
return 0, "", err
}
// Use the content from version2 if version1 is not found
if version1.FileVersion == 0 {
content1 = content2
}
content2 = newContent2
} else {
return 0, "", err
}
}
err = nil
// diff the files
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(content1), string(content2), false)
// calculate the char delta
for _, diff := range diffs {
if diff.Type == diffmatchpatch.DiffInsert || diff.Type == diffmatchpatch.DiffDelete {
charDelta += len(diff.Text)
}
}
// infer the location
locations := []string{}
for _, diff := range diffs {
switch diff.Type {
case diffmatchpatch.DiffDelete:
// A deletion may indicate fixing an error, include it
locations = append(locations, fmt.Sprintf("Deleted: %q", diff.Text))
case diffmatchpatch.DiffInsert:
// An insertion may indicate adding new code, include it
locations = append(locations, fmt.Sprintf("Inserted: %q", diff.Text))
// You can choose to ignore or handle 'DiffEqual' differently, depending on your requirements.
}
}
location = strings.Join(locations, ", ")
return
}
type ErrorQuotientAnalysisResult struct {
// errorEntries is a map of error types to the log entries
// format: map[filePath][]logEntry
compilationEvents *internal.ResultStore[[]CompilationEvent]
}
type Analyzer struct {
ErrorsByParticipant map[string]ErrorQuotientAnalysisResult
}
func getLastVersionNumberFromIdx(entries []logger.LogEntry, filePath string, idx int) int {
for i := idx; i >= 0; i-- {
if entries[i].FilePath == filePath && entries[i].FileVersion != 0 {
return entries[i].FileVersion
}
}
return 0
}
func (e *Analyzer) Analyze(writer analyzer.KVWriter, loaders ...analyzer.LoggerLoader) error {
results := map[string]ErrorQuotientAnalysisResult{}
for _, loader := range loaders {
// Read the log file in a goroutine
log, err := loader()
if err != nil {
continue
}
// map[participantId]map[filePath][]logEntry
logEntries := map[string]*internal.ResultStore[[]logger.LogEntry]{}
// Get all the iter from the logger
iter, err := log.Entries()
if err != nil {
continue
}
// Count the number of times each error occurred
for iter.Next() {
entry, err := iter.Value()
if err != nil {
// break the loop if sql no rows error
if err.Error() == "sql: no rows in result set" {
break
}
continue
}
// Skip if the error message is "file not found". This is not the programmers fault.
if strings.Contains(entry.ErrorMessage, "error: file not found:") {
continue
}
participantId := entry.ParticipantId
if _, ok := logEntries[participantId]; !ok {
logEntries[participantId] = internal.NewResultStore[[]logger.LogEntry]()
}
filePath := logEntries[participantId].FilenameNearest(entry.FilePath)
existing := logEntries[participantId].GetOr(filePath, []logger.LogEntry{})
logEntries[participantId].Set(filePath, append(existing, entry))
}
for participantId, logEntries := range logEntries {
// map[filePath][]CompilationEvent
compilationEvents := internal.NewResultStore[[]CompilationEvent]()
for filePathIdx, entries := range logEntries.Values {
filePath := logEntries.FilenameNearest(logEntries.Filenames[filePathIdx])
for i := 0; i < len(entries)-1; i++ {
entry1 := entries[i]
// Because the filePath uses the nearestFilename, some entries may have the wrong version number
// because the original file path is not found in the logEntries.
if entry1.FileVersion == 0 {
entry1.FileVersion = getLastVersionNumberFromIdx(entries, filePath, i)
}
entry2 := entries[i+1]
// Same as above, but for the second entry
if entry2.FileVersion == 0 {
if entry1.FileVersion != 0 {
entry2.FileVersion = entry1.FileVersion
} else {
entry2.FileVersion = getLastVersionNumberFromIdx(entries, filePath, i+1)
}
}
// Calculate CharDelta between file versions
charDelta, location, err := CalculateCharDeltaAndLocation(log, filePath, entry1, entry2)
if err != nil {
// TODO: replace it with proper error handling
fmt.Printf("Error calculating char delta: %v\n", err)
continue
}
compilationEvent := CompilationEvent{
ErrorType: ErrorTypeConversion(entry1.ErrorCode),
TimeDelta: int(entry2.CreatedAt.Time.Sub(entry1.CreatedAt.Time).Seconds()),
CharDelta: charDelta,
Location: location,
}
compilationEvents.Set(
filePath,
append(
compilationEvents.GetOr(filePath, []CompilationEvent{}),
compilationEvent))
}
}
// store the compilation events
results[participantId] = ErrorQuotientAnalysisResult{
compilationEvents: compilationEvents,
}
}
}
for participantId, result := range results {
for filePathIdx, events := range result.compilationEvents.Values {
filePath := result.compilationEvents.Filenames[filePathIdx]
writer.Write(KEY, participantId, filePath, calculateEQ(events))
}
}
return nil
}