/
fileAnalyzer.go
183 lines (140 loc) · 4.81 KB
/
fileAnalyzer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
package logAnalyzer
import (
"bufio"
"fmt"
"github.com/cheggaaa/pb"
"log"
"os"
"regexp"
"strconv"
"strings"
)
func IsFileOK(regularExpressions []NamedRegEx, regexToIdentifyIgnoredParts []IgnoreRegEx, duplicateBufferSize int, showProgress bool, fileUrlToAnalyze string) (result bool, hits []string) {
result = true
fileToAnalyze, err := os.Open(fileUrlToAnalyze)
if err != nil {
fmt.Println("[ ERROR ] Could not open logfile.")
fmt.Println()
os.Exit(1)
}
defer fileToAnalyze.Close()
fileStat, err := fileToAnalyze.Stat()
if err != nil {
log.Fatal(err)
}
fileSizeInBytes := fileStat.Size()
bar := pb.New64(fileSizeInBytes).SetUnits(pb.U_BYTES)
if showProgress {
bar.Start()
}
lineBuffer := make([]string, duplicateBufferSize)
scanner := bufio.NewScanner(fileToAnalyze)
var lineNumber = 0
for scanner.Scan() {
lineNumber++
line := strings.TrimSpace(scanner.Text())
var duplicatesExist bool = false
var hitOffset int = 0
duplicatesExist, hitOffset, lineBuffer = checkForDuplicates(regularExpressions, regexToIdentifyIgnoredParts, lineBuffer, line, duplicateBufferSize)
if duplicatesExist {
hits = append(hits, "FILE/"+REGEX_LEVEL_DUPLICATE+" #"+strconv.Itoa(lineNumber-hitOffset)+"/"+strconv.Itoa(lineNumber)+" - "+line)
result = false
}
lineLengthInBytes := len(line)
lineIsOk, hitString, hitRegEx := IsLineOK(regularExpressions, regexToIdentifyIgnoredParts, line)
if !lineIsOk {
hits = append(hits, hitRegEx.Level+"/"+hitRegEx.Name+" #"+strconv.Itoa(lineNumber)+" - "+hitString)
result = false
}
if showProgress {
bar.Add(lineLengthInBytes)
}
}
if showProgress {
bar.Finish()
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
return result, hits
}
func checkForDuplicates(regularExpressions []NamedRegEx, regexToIdentifyIgnoredParts []IgnoreRegEx, buffer []string, newLine string, duplicateBufferSize int) (result bool, hitOffset int, outbuffer []string) {
hitOffset = duplicateBufferSize
// if buffersize is samller than two there is nothing to compare
if duplicateBufferSize < 2 {
return false, 0, buffer
}
// update your checking window
// if your window is size 5 you have to remove the
// first line if the buffer has the size of 5
if len(buffer) == duplicateBufferSize {
// delete first element
buffer = buffer[:0+copy(buffer[0:], buffer[1:])]
}
// should the current line be ignored ???
for _, regEx := range regexToIdentifyIgnoredParts {
if regEx.Level == REGEX_LEVEL_DUPLICATE {
ignored, _ := regexp.MatchString(regEx.RegEx, newLine)
if ignored {
return false, 0, buffer
}
}
}
// trim the new line according to the various regEx specified by the user
trimmedLine := newLine
for _, regEx := range regularExpressions {
if regEx.Level == REGEX_LEVEL_DUPLICATE_TRIM {
r, _ := regexp.Compile(regEx.RegEx)
trimmedLine = r.ReplaceAllString(trimmedLine, "")
}
}
// check if the trimmed line
// example from above the buffer has now the size of 4, by a window size of 5.
// this OK because you have to add the newLine afterwards (but it is easier to check
// for duplicates first)
result = false
for _, bufferLine := range buffer {
hitOffset = hitOffset - 1
if match(regularExpressions, bufferLine, trimmedLine) {
result = true
break
}
}
buffer = append(buffer, trimmedLine)
return result, hitOffset, buffer
}
func match(regularExpressions []NamedRegEx, left string, right string) (result bool) {
foundDifferRegEx := false
mustDifferLeft := ""
mustDifferRight := ""
trimmedLeft := left
trimmedRight := right
// fmt.Println("BEFORE TRIMMING THE PARTS WHICH MUST DIFFER: " + trimmedLeft + " == " + trimmedRight)
for _, namedRegExregEx := range regularExpressions {
if namedRegExregEx.Level == REGEX_LEVEL_DUPLICATE_DIFFER {
regEx, _ := regexp.Compile(namedRegExregEx.RegEx)
findingsLeft := regEx.FindAllString(left, -1)
mustDifferLeft = mustDifferLeft + strings.Join(findingsLeft, "-")
trimmedLeft = regEx.ReplaceAllString(trimmedLeft, "")
findingsRight := regEx.FindAllString(right, -1)
mustDifferRight = mustDifferRight + strings.Join(findingsRight, "-")
trimmedRight = regEx.ReplaceAllString(trimmedRight, "")
foundDifferRegEx = true
}
}
// no parts defined which should differ - so the result
// is the comparision of the two trimmed parts
if !foundDifferRegEx {
return trimmedLeft == trimmedRight
}
// fmt.Println("AFTER TRIMMING THE PARTS WHICH MUST DIFFER: " + trimmedLeft + " == " + trimmedRight)
// there IS something defined which should differ
// but if the trimmed parts are not equal, the two
// strings can not match
if trimmedRight != trimmedLeft {
return false
}
// fmt.Println("THE PARTS WHICH MUST DIFFER: " + mustDifferLeft + " == " + mustDifferRight)
// the trimmed parts are equal and something should differ
return mustDifferLeft != mustDifferRight
}